From 5cb100b38a7bf5bc5e892fa46998ebcbab925f37 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Sun, 5 May 2013 20:03:40 +0800
Subject: [PATCH 001/287] KVM: add missing misc_deregister() on error in
 kvm_init()

Add the missing misc_deregister() before return from kvm_init()
in the debugfs init error handling case.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit afc2f792cdcb67f4257f0e68d10ee4a7b7eae57a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 302681c4aa44..b547a1ceecbc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3181,6 +3181,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
 out_undebugfs:
 	unregister_syscore_ops(&kvm_syscore_ops);
+	misc_deregister(&kvm_dev);
 out_unreg:
 	kvm_async_pf_deinit();
 out_free:

From 86882a4950ff8f054d0b89bb36655446a04b89c0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 14:31:01 +0100
Subject: [PATCH 002/287] ARM: KVM: move GIC/timer code to a common location

As KVM/arm64 is looming on the horizon, it makes sense to move some
of the common code to a single location in order to reduce duplication.

The code could live anywhere. Actually, most of KVM is already built
with a bunch of ugly ../../.. hacks in the various Makefiles, so we're
not exactly talking about style here. But maybe it is time to start
moving into a less ugly direction.

The include files must be in a "public" location, as they are accessed
from non-KVM files (arch/arm/kernel/asm-offsets.c).

For this purpose, introduce two new locations:
- virt/kvm/arm/ : x86 and ia64 already share the ioapic code in
  virt/kvm, so this could be seen as a (very ugly) precedent.
- include/kvm/  : there is already an include/xen, and while the
  intent is slightly different, this seems as good a location as
  any

Eventually, we should probably have independant Makefiles at every
levels (just like everywhere else in the kernel), but this is just
the first step.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 7275acdfe29ba03ad2f6e150386900c4e2d43fb1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h                            | 4 ++--
 arch/arm/kvm/Makefile                                      | 7 ++++---
 .../asm/kvm_arch_timer.h => include/kvm/arm_arch_timer.h   | 0
 arch/arm/include/asm/kvm_vgic.h => include/kvm/arm_vgic.h  | 0
 {arch/arm/kvm => virt/kvm/arm}/arch_timer.c                | 4 ++--
 {arch/arm/kvm => virt/kvm/arm}/vgic.c                      | 0
 6 files changed, 8 insertions(+), 7 deletions(-)
 rename arch/arm/include/asm/kvm_arch_timer.h => include/kvm/arm_arch_timer.h (100%)
 rename arch/arm/include/asm/kvm_vgic.h => include/kvm/arm_vgic.h (100%)
 rename {arch/arm/kvm => virt/kvm/arm}/arch_timer.c (99%)
 rename {arch/arm/kvm => virt/kvm/arm}/vgic.c (100%)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 57cb786a6203..ff5aaf10e6ec 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -23,7 +23,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/fpstate.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_arch_timer.h>
 
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #define KVM_USER_MEM_SLOTS 32
@@ -38,7 +38,7 @@
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
 
-#include <asm/kvm_vgic.h>
+#include <kvm/arm_vgic.h>
 
 struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 53c5ed83d16f..9184a491d172 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -14,10 +14,11 @@ CFLAGS_mmu.o := -I.
 AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
-kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+KVM := ../../../virt/kvm
+kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o)
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
-obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/include/kvm/arm_arch_timer.h
similarity index 100%
rename from arch/arm/include/asm/kvm_arch_timer.h
rename to include/kvm/arm_arch_timer.h
diff --git a/arch/arm/include/asm/kvm_vgic.h b/include/kvm/arm_vgic.h
similarity index 100%
rename from arch/arm/include/asm/kvm_vgic.h
rename to include/kvm/arm_vgic.h
diff --git a/arch/arm/kvm/arch_timer.c b/virt/kvm/arm/arch_timer.c
similarity index 99%
rename from arch/arm/kvm/arch_timer.c
rename to virt/kvm/arm/arch_timer.c
index c55b6089e923..2d00b2925780 100644
--- a/arch/arm/kvm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -25,8 +25,8 @@
 #include <clocksource/arm_arch_timer.h>
 #include <asm/arch_timer.h>
 
-#include <asm/kvm_vgic.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
 
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
diff --git a/arch/arm/kvm/vgic.c b/virt/kvm/arm/vgic.c
similarity index 100%
rename from arch/arm/kvm/vgic.c
rename to virt/kvm/arm/vgic.c

From 7ce8a35ca784be99a50520476c1a34aad2755b7a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 14:31:02 +0100
Subject: [PATCH 003/287] KVM: get rid of $(addprefix ../../../virt/kvm/, ...)
 in Makefiles

As requested by the KVM maintainers, remove the addprefix used to
refer to the main KVM code from the arch code, and replace it with
a KVM variable that does the same thing.

Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Christoffer Dall <cdall@cs.columbia.edu>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Alexander Graf <agraf@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 535cf7b3b13c7faed3dfabafb6598417de1129ca)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Makefile     |  2 +-
 arch/ia64/kvm/Makefile    |  7 ++++---
 arch/powerpc/kvm/Makefile | 13 +++++++------
 arch/s390/kvm/Makefile    |  3 ++-
 arch/x86/kvm/Makefile     | 13 +++++++------
 5 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 9184a491d172..d99bee4950e5 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
 KVM := ../../../virt/kvm
-kvm-arm-y = $(addprefix $(KVM)/, kvm_main.o coalesced_mmio.o)
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 1a4053789d01..18e45ec49bbf 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file)
 
 ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+KVM := ../../../virt/kvm
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-		coalesced_mmio.o irq_comm.o)
+common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
 
 ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
+common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
 endif
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f4d46c..008cd856c5b5 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -5,9 +5,10 @@
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+KVM := ../../../virt/kvm
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
-						eventfd.o)
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
+		$(KVM)/eventfd.o
 
 CFLAGS_44x_tlb.o  := -I.
 CFLAGS_e500_mmu.o := -I.
@@ -53,7 +54,7 @@ kvm-e500mc-objs := \
 kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
-	../../../virt/kvm/coalesced_mmio.o \
+	$(KVM)/coalesced_mmio.o \
 	fpu.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
@@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
 	book3s_xics.o
 
 kvm-book3s_64-module-objs := \
-	../../../virt/kvm/kvm_main.o \
-	../../../virt/kvm/eventfd.o \
+	$(KVM)/kvm_main.o \
+	$(KVM)/eventfd.o \
 	powerpc.o \
 	emulate.o \
 	book3s.o \
@@ -111,7 +112,7 @@ kvm-book3s_32-objs := \
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
 
 kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
 
 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
 
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 8fe9d65a4585..40b4c6470f88 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,7 +6,8 @@
 # it under the terms of the GNU General Public License (version 2 only)
 # as published by the Free Software Foundation.
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d609e1d84048..bf4fb04d0112 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -5,12 +5,13 @@ CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
 CFLAGS_vmx.o := -I.
 
-kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-				coalesced_mmio.o irq_comm.o eventfd.o \
-				irqchip.o)
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(addprefix ../../../virt/kvm/, \
-				assigned-dev.o iommu.o)
-kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
+KVM := ../../../virt/kvm
+
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+				$(KVM)/eventfd.o $(KVM)/irqchip.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
+kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o cpuid.o pmu.o

From 9861d210e02d5fb6ffcd5ea992f62043105813b3 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 26 Mar 2013 13:41:35 +0000
Subject: [PATCH 004/287] ARM: KVM: arch_timers: zero CNTVOFF upon return to
 host

To use the virtual counters from the host, we need to ensure that
CNTVOFF doesn't change unexpectedly. When we change to a guest, we
replace the host's CNTVOFF, but we don't restore it when returning to
the host.

As the host sets CNTVOFF to zero, and never changes it, we can simply
zero CNTVOFF when returning to the host. This patch adds said zeroing to
the return to host path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Acked-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit f793c23ebbe5afd1cabf4a42a3a297022213756f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/interrupts_head.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 2b44b95a86dd..6f18695a09cb 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -503,6 +503,10 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
 
+	@ Ensure host CNTVCT == CNTPCT
+	mov	r2, #0
+	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
+
 1:
 #endif
 	@ Allow physical timer/counter access for the host

From 16d32e6b5aad12c2566cd8c6918e1be3ecac06f5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Dec 2012 18:40:43 +0000
Subject: [PATCH 005/287] arm64: KVM: HYP mode idmap support

Add the necessary infrastructure for identity-mapped HYP page
tables. Idmap-ed code must be in the ".hyp.idmap.text" linker
section.

The rest of the HYP ends up in ".hyp.text".

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 2240bbb697354f5617d95e3ee104ca61bb812507)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kernel/vmlinux.lds.S | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index ce2d97255ba9..55d0e035205f 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -17,6 +17,19 @@ ENTRY(stext)
 
 jiffies = jiffies_64;
 
+#define HYPERVISOR_TEXT					\
+	/*						\
+	 * Force the alignment to be compatible with	\
+	 * the vectors requirements			\
+	 */						\
+	. = ALIGN(2048);				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;	\
+	*(.hyp.idmap.text)				\
+	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;	\
+	VMLINUX_SYMBOL(__hyp_text_start) = .;		\
+	*(.hyp.text)					\
+	VMLINUX_SYMBOL(__hyp_text_end) = .;
+
 SECTIONS
 {
 	/*
@@ -48,6 +61,7 @@ SECTIONS
 			TEXT_TEXT
 			SCHED_TEXT
 			LOCK_TEXT
+			HYPERVISOR_TEXT
 			*(.fixup)
 			*(.gnu.warning)
 		. = ALIGN(16);
@@ -102,3 +116,9 @@ SECTIONS
 	STABS_DEBUG
 	.comment 0 : { *(.comment) }
 }
+
+/*
+ * The HYP init code can't be more than a page long.
+ */
+ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end),
+       "HYP init code too big")

From a51ca39794b462370c0b4f46b360370f55303454 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 10:46:47 +0000
Subject: [PATCH 006/287] arm64: KVM: EL2 register definitions

Define all the useful bitfields for EL2 registers.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 0369f6a34b9facd16eea4236518ca6f9cbc9e5ef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h | 245 +++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_arm.h

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
new file mode 100644
index 000000000000..a5f28e2720c7
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_ARM_H__
+#define __ARM64_KVM_ARM_H__
+
+#include <asm/types.h>
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_ID		(UL(1) << 33)
+#define HCR_CD		(UL(1) << 32)
+#define HCR_RW_SHIFT	31
+#define HCR_RW		(UL(1) << HCR_RW_SHIFT)
+#define HCR_TRVM	(UL(1) << 30)
+#define HCR_HCD		(UL(1) << 29)
+#define HCR_TDZ		(UL(1) << 28)
+#define HCR_TGE		(UL(1) << 27)
+#define HCR_TVM		(UL(1) << 26)
+#define HCR_TTLB	(UL(1) << 25)
+#define HCR_TPU		(UL(1) << 24)
+#define HCR_TPC		(UL(1) << 23)
+#define HCR_TSW		(UL(1) << 22)
+#define HCR_TAC		(UL(1) << 21)
+#define HCR_TIDCP	(UL(1) << 20)
+#define HCR_TSC		(UL(1) << 19)
+#define HCR_TID3	(UL(1) << 18)
+#define HCR_TID2	(UL(1) << 17)
+#define HCR_TID1	(UL(1) << 16)
+#define HCR_TID0	(UL(1) << 15)
+#define HCR_TWE		(UL(1) << 14)
+#define HCR_TWI		(UL(1) << 13)
+#define HCR_DC		(UL(1) << 12)
+#define HCR_BSU		(3 << 10)
+#define HCR_BSU_IS	(UL(1) << 10)
+#define HCR_FB		(UL(1) << 9)
+#define HCR_VA		(UL(1) << 8)
+#define HCR_VI		(UL(1) << 7)
+#define HCR_VF		(UL(1) << 6)
+#define HCR_AMO		(UL(1) << 5)
+#define HCR_IMO		(UL(1) << 4)
+#define HCR_FMO		(UL(1) << 3)
+#define HCR_PTW		(UL(1) << 2)
+#define HCR_SWIO	(UL(1) << 1)
+#define HCR_VM		(UL(1) << 0)
+
+/*
+ * The bits we set in HCR:
+ * RW:		64bit by default, can be overriden for 32bit VMs
+ * TAC:		Trap ACTLR
+ * TSC:		Trap SMC
+ * TSW:		Trap cache operations by set/way
+ * TWI:		Trap WFI
+ * TIDCP:	Trap L2CTLR/L2ECTLR
+ * BSU_IS:	Upgrade barriers to the inner shareable domain
+ * FB:		Force broadcast of all maintainance operations
+ * AMO:		Override CPSR.A and enable signaling with VA
+ * IMO:		Override CPSR.I and enable signaling with VI
+ * FMO:		Override CPSR.F and enable signaling with VF
+ * SWIO:	Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
+			 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+			 HCR_SWIO | HCR_TIDCP | HCR_RW)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+
+/* Hyp System Control Register (SCTLR_EL2) bits */
+#define SCTLR_EL2_EE	(1 << 25)
+#define SCTLR_EL2_WXN	(1 << 19)
+#define SCTLR_EL2_I	(1 << 12)
+#define SCTLR_EL2_SA	(1 << 3)
+#define SCTLR_EL2_C	(1 << 2)
+#define SCTLR_EL2_A	(1 << 1)
+#define SCTLR_EL2_M	1
+#define SCTLR_EL2_FLAGS	(SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C |	\
+			 SCTLR_EL2_SA | SCTLR_EL2_I)
+
+/* TCR_EL2 Registers bits */
+#define TCR_EL2_TBI	(1 << 20)
+#define TCR_EL2_PS	(7 << 16)
+#define TCR_EL2_PS_40B	(2 << 16)
+#define TCR_EL2_TG0	(1 << 14)
+#define TCR_EL2_SH0	(3 << 12)
+#define TCR_EL2_ORGN0	(3 << 10)
+#define TCR_EL2_IRGN0	(3 << 8)
+#define TCR_EL2_T0SZ	0x3f
+#define TCR_EL2_MASK	(TCR_EL2_TG0 | TCR_EL2_SH0 | \
+			 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+
+#define TCR_EL2_FLAGS	(TCR_EL2_PS_40B)
+
+/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_PS_MASK	(7 << 16)
+#define VTCR_EL2_PS_40B		(2 << 16)
+#define VTCR_EL2_TG0_MASK	(1 << 14)
+#define VTCR_EL2_TG0_4K		(0 << 14)
+#define VTCR_EL2_TG0_64K	(1 << 14)
+#define VTCR_EL2_SH0_MASK	(3 << 12)
+#define VTCR_EL2_SH0_INNER	(3 << 12)
+#define VTCR_EL2_ORGN0_MASK	(3 << 10)
+#define VTCR_EL2_ORGN0_WBWA	(1 << 10)
+#define VTCR_EL2_IRGN0_MASK	(3 << 8)
+#define VTCR_EL2_IRGN0_WBWA	(1 << 8)
+#define VTCR_EL2_SL0_MASK	(3 << 6)
+#define VTCR_EL2_SL0_LVL1	(1 << 6)
+#define VTCR_EL2_T0SZ_MASK	0x3f
+#define VTCR_EL2_T0SZ_40B	24
+
+#ifdef CONFIG_ARM64_64K_PAGES
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input  (T0SZ = 24)
+ * 64kB pages (TG0 = 1)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
+				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
+				 VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
+#else
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input  (T0SZ = 24)
+ * 4kB pages (TG0 = 0)
+ * 3 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
+				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
+				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
+				 VTCR_EL2_T0SZ_40B)
+#define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
+#endif
+
+#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
+#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (48LLU)
+#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
+
+/* Hyp System Trap Register */
+#define HSTR_EL2_TTEE	(1 << 16)
+#define HSTR_EL2_T(x)	(1 << x)
+
+/* Hyp Coprocessor Trap Register */
+#define CPTR_EL2_TCPAC	(1 << 31)
+#define CPTR_EL2_TTA	(1 << 20)
+#define CPTR_EL2_TFP	(1 << 10)
+
+/* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TDRA		(1 << 11)
+#define MDCR_EL2_TDOSA		(1 << 10)
+#define MDCR_EL2_TDA		(1 << 9)
+#define MDCR_EL2_TDE		(1 << 8)
+#define MDCR_EL2_HPME		(1 << 7)
+#define MDCR_EL2_TPM		(1 << 6)
+#define MDCR_EL2_TPMCR		(1 << 5)
+#define MDCR_EL2_HPMN_MASK	(0x1F)
+
+/* Exception Syndrome Register (ESR) bits */
+#define ESR_EL2_EC_SHIFT	(26)
+#define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
+#define ESR_EL2_IL		(1U << 25)
+#define ESR_EL2_ISS		(ESR_EL2_IL - 1)
+#define ESR_EL2_ISV_SHIFT	(24)
+#define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
+#define ESR_EL2_SAS_SHIFT	(22)
+#define ESR_EL2_SAS		(3U << ESR_EL2_SAS_SHIFT)
+#define ESR_EL2_SSE		(1 << 21)
+#define ESR_EL2_SRT_SHIFT	(16)
+#define ESR_EL2_SRT_MASK	(0x1f << ESR_EL2_SRT_SHIFT)
+#define ESR_EL2_SF 		(1 << 15)
+#define ESR_EL2_AR 		(1 << 14)
+#define ESR_EL2_EA 		(1 << 9)
+#define ESR_EL2_CM 		(1 << 8)
+#define ESR_EL2_S1PTW 		(1 << 7)
+#define ESR_EL2_WNR		(1 << 6)
+#define ESR_EL2_FSC		(0x3f)
+#define ESR_EL2_FSC_TYPE	(0x3c)
+
+#define ESR_EL2_CV_SHIFT	(24)
+#define ESR_EL2_CV		(1U << ESR_EL2_CV_SHIFT)
+#define ESR_EL2_COND_SHIFT	(20)
+#define ESR_EL2_COND		(0xfU << ESR_EL2_COND_SHIFT)
+
+
+#define FSC_FAULT	(0x04)
+#define FSC_PERM	(0x0c)
+
+/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+#define HPFAR_MASK	(~0xFUL)
+
+#define ESR_EL2_EC_UNKNOWN	(0x00)
+#define ESR_EL2_EC_WFI		(0x01)
+#define ESR_EL2_EC_CP15_32	(0x03)
+#define ESR_EL2_EC_CP15_64	(0x04)
+#define ESR_EL2_EC_CP14_MR	(0x05)
+#define ESR_EL2_EC_CP14_LS	(0x06)
+#define ESR_EL2_EC_FP_ASIMD	(0x07)
+#define ESR_EL2_EC_CP10_ID	(0x08)
+#define ESR_EL2_EC_CP14_64	(0x0C)
+#define ESR_EL2_EC_ILL_ISS	(0x0E)
+#define ESR_EL2_EC_SVC32	(0x11)
+#define ESR_EL2_EC_HVC32	(0x12)
+#define ESR_EL2_EC_SMC32	(0x13)
+#define ESR_EL2_EC_SVC64	(0x15)
+#define ESR_EL2_EC_HVC64	(0x16)
+#define ESR_EL2_EC_SMC64	(0x17)
+#define ESR_EL2_EC_SYS64	(0x18)
+#define ESR_EL2_EC_IABT		(0x20)
+#define ESR_EL2_EC_IABT_HYP	(0x21)
+#define ESR_EL2_EC_PC_ALIGN	(0x22)
+#define ESR_EL2_EC_DABT		(0x24)
+#define ESR_EL2_EC_DABT_HYP	(0x25)
+#define ESR_EL2_EC_SP_ALIGN	(0x26)
+#define ESR_EL2_EC_FP_EXC32	(0x28)
+#define ESR_EL2_EC_FP_EXC64	(0x2C)
+#define ESR_EL2_EC_SERRROR	(0x2F)
+#define ESR_EL2_EC_BREAKPT	(0x30)
+#define ESR_EL2_EC_BREAKPT_HYP	(0x31)
+#define ESR_EL2_EC_SOFTSTP	(0x32)
+#define ESR_EL2_EC_SOFTSTP_HYP	(0x33)
+#define ESR_EL2_EC_WATCHPT	(0x34)
+#define ESR_EL2_EC_WATCHPT_HYP	(0x35)
+#define ESR_EL2_EC_BKPT32	(0x38)
+#define ESR_EL2_EC_VECTOR32	(0x3A)
+#define ESR_EL2_EC_BRK64	(0x3C)
+
+#define ESR_EL2_EC_xABT_xFSR_EXTABT	0x10
+
+#endif /* __ARM64_KVM_ARM_H__ */

From 44848d746b7f29803f7ac9579d594f368a1cad11 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 11:16:40 +0000
Subject: [PATCH 007/287] arm64: KVM: system register definitions for 64bit
 guests

Define the saved/restored registers for 64bit guests.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit fd9fc9f73cc2070d2637a7ee082800a817fd45f3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h | 68 ++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_asm.h

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
new file mode 100644
index 000000000000..591ac219964a
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+/*
+ * 0 is reserved as an invalid value.
+ * Order *must* be kept in sync with the hyp switch code.
+ */
+#define	MPIDR_EL1	1	/* MultiProcessor Affinity Register */
+#define	CSSELR_EL1	2	/* Cache Size Selection Register */
+#define	SCTLR_EL1	3	/* System Control Register */
+#define	ACTLR_EL1	4	/* Auxilliary Control Register */
+#define	CPACR_EL1	5	/* Coprocessor Access Control */
+#define	TTBR0_EL1	6	/* Translation Table Base Register 0 */
+#define	TTBR1_EL1	7	/* Translation Table Base Register 1 */
+#define	TCR_EL1		8	/* Translation Control Register */
+#define	ESR_EL1		9	/* Exception Syndrome Register */
+#define	AFSR0_EL1	10	/* Auxilary Fault Status Register 0 */
+#define	AFSR1_EL1	11	/* Auxilary Fault Status Register 1 */
+#define	FAR_EL1		12	/* Fault Address Register */
+#define	MAIR_EL1	13	/* Memory Attribute Indirection Register */
+#define	VBAR_EL1	14	/* Vector Base Address Register */
+#define	CONTEXTIDR_EL1	15	/* Context ID Register */
+#define	TPIDR_EL0	16	/* Thread ID, User R/W */
+#define	TPIDRRO_EL0	17	/* Thread ID, User R/O */
+#define	TPIDR_EL1	18	/* Thread ID, Privileged */
+#define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
+#define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+#define	NR_SYS_REGS	21
+
+#define ARM_EXCEPTION_IRQ	  0
+#define ARM_EXCEPTION_TRAP	  1
+
+#ifndef __ASSEMBLY__
+struct kvm;
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_vector[];
+
+extern char __kvm_hyp_code_start[];
+extern char __kvm_hyp_code_end[];
+
+extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+#endif
+
+#endif /* __ARM_KVM_ASM_H__ */

From b04c4cdfff4b701f01f1dae2ada92b580cc281d5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 13:27:52 +0000
Subject: [PATCH 008/287] arm64: KVM: Basic ESR_EL2 helpers and vcpu register
 access

Implements helpers for dealing with the EL2 syndrome register as
well as accessing the vcpu registers.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 83a4979483c8e597b69d4403794f87fea51fa549)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h | 158 +++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_emulate.h

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
new file mode 100644
index 000000000000..6c1725e93b0b
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/kvm_emulate.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_EMULATE_H__
+#define __ARM64_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmio.h>
+#include <asm/ptrace.h>
+
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
+static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
+}
+
+static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
+}
+
+static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+}
+
+static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+{
+	return false;	/* 32bit? Bahhh... */
+}
+
+static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+{
+	return true;	/* No conditionals on arm64 */
+}
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	*vcpu_pc(vcpu) += 4;
+}
+
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
+
+/* Get vcpu SPSR for current mode */
+static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+{
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
+{
+	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+
+	return mode != PSR_MODE_EL0t;
+}
+
+static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.esr_el2;
+}
+
+static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.fault.far_el2;
+}
+
+static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+{
+	return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+}
+
+static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV);
+}
+
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR);
+}
+
+static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE);
+}
+
+static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+{
+	return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT;
+}
+
+static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA);
+}
+
+static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW);
+}
+
+static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+{
+	return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT);
+}
+
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+{
+	return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL);
+}
+
+static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT;
+}
+
+static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
+}
+
+#endif /* __ARM64_KVM_EMULATE_H__ */

From fbd17d89d0b567e7c5c12955648cf728f0c04169 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 17 Dec 2012 12:27:42 +0000
Subject: [PATCH 009/287] arm64: KVM: fault injection into a guest

Implement the injection of a fault (undefined, data abort or
prefetch abort) into a 64bit guest.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit aa8eff9bfbd531e0fcc8e68052f4ac545cd004c5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/inject_fault.c | 126 ++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 arch/arm64/kvm/inject_fault.c

diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
new file mode 100644
index 000000000000..54f656271266
--- /dev/null
+++ b/arch/arm64/kvm/inject_fault.c
@@ -0,0 +1,126 @@
+/*
+ * Fault injection for 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/esr.h>
+
+#define PSTATE_FAULT_BITS_64 	(PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
+				 PSR_I_BIT | PSR_D_BIT)
+#define EL1_EXCEPT_SYNC_OFFSET	0x200
+
+static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_aarch32;
+	u32 esr = 0;
+
+	is_aarch32 = vcpu_mode_is_32bit(vcpu);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+
+	/*
+	 * Build an {i,d}abort, depending on the level and the
+	 * instruction set. Report an external synchronous abort.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_EL1_IL;
+
+	/*
+	 * Here, the guest runs in AArch64 mode when in EL1. If we get
+	 * an AArch32 fault, it means we managed to trap an EL0 fault.
+	 */
+	if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
+		esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT);
+	else
+		esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT);
+
+	if (!is_iabt)
+		esr |= ESR_EL1_EC_DABT_EL0;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT;
+}
+
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT);
+
+	*vcpu_spsr(vcpu) = cpsr;
+	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+	/*
+	 * Build an unknown exception, depending on the instruction
+	 * set.
+	 */
+	if (kvm_vcpu_trap_il_is32bit(vcpu))
+		esr |= ESR_EL1_IL;
+
+	vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	inject_abt64(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	inject_abt64(vcpu, true, addr);
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined instruction into the guest
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+	inject_undef64(vcpu);
+}

From bef26a85389996c6f9c79f09d2ba09da0cd46351 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 15:35:24 +0000
Subject: [PATCH 010/287] arm64: KVM: architecture specific MMU backend

Define the arm64 specific MMU backend:
- HYP/kernel VA offset
- S2 4/64kB definitions
- S2 page table populating and flushing
- icache cleaning

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 37c437532b0126d1df5685080db9cecf3d918175)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_mmu.h | 135 +++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_mmu.h

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
new file mode 100644
index 000000000000..efe609c6a3c9
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMU_H__
+#define __ARM64_KVM_MMU_H__
+
+#include <asm/page.h>
+#include <asm/memory.h>
+
+/*
+ * As we only have the TTBR0_EL2 register, we cannot express
+ * "negative" addresses. This makes it impossible to directly share
+ * mappings with the kernel.
+ *
+ * Instead, give the HYP mode its own VA region at a fixed offset from
+ * the kernel by just masking the top bits (which are all ones for a
+ * kernel address).
+ */
+#define HYP_PAGE_OFFSET_SHIFT	VA_BITS
+#define HYP_PAGE_OFFSET_MASK	((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
+#define HYP_PAGE_OFFSET		(PAGE_OFFSET & HYP_PAGE_OFFSET_MASK)
+
+/*
+ * Our virtual mapping for the idmap-ed MMU-enable code. Must be
+ * shared across all the page-tables. Conveniently, we use the last
+ * possible page, where no kernel mapping will ever exist.
+ */
+#define TRAMPOLINE_VA		(HYP_PAGE_OFFSET_MASK & PAGE_MASK)
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Convert a kernel VA into a HYP VA.
+ * reg: VA to be converted.
+ */
+.macro kern_hyp_va	reg
+	and	\reg, \reg, #HYP_PAGE_OFFSET_MASK
+.endm
+
+#else
+
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+
+#define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
+
+/*
+ * Align KVM with the kernel's view of physical memory. Should be
+ * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ */
+#define KVM_PHYS_SHIFT	PHYS_MASK_SHIFT
+#define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
+
+/* Make sure we get the right size, and thus the right alignment */
+#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
+#define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+
+int create_hyp_mappings(void *from, void *to);
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+void free_boot_hyp_pgd(void);
+void free_hyp_pgds(void);
+
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+			  phys_addr_t pa, unsigned long size);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
+phys_addr_t kvm_mmu_get_httbr(void);
+phys_addr_t kvm_mmu_get_boot_httbr(void);
+phys_addr_t kvm_get_idmap_vector(void);
+int kvm_mmu_init(void);
+void kvm_clear_hyp_idmap(void);
+
+#define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
+
+static inline bool kvm_is_write_fault(unsigned long esr)
+{
+	unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
+
+	if (esr_ec == ESR_EL2_EC_IABT)
+		return false;
+
+	if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
+		return false;
+
+	return true;
+}
+
+static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
+static inline void kvm_clean_pgd(pgd_t *pgd) {}
+static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
+static inline void kvm_clean_pte(pte_t *pte) {}
+static inline void kvm_clean_pte_entry(pte_t *pte) {}
+
+static inline void kvm_set_s2pte_writable(pte_t *pte)
+{
+	pte_val(*pte) |= PTE_S2_RDWR;
+}
+
+struct kvm;
+
+static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+{
+	if (!icache_is_aliasing()) {		/* PIPT */
+		unsigned long hva = gfn_to_hva(kvm, gfn);
+		flush_icache_range(hva, hva + PAGE_SIZE);
+	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
+		/* any kind of VIPT cache */
+		__flush_icache_all();
+	}
+}
+
+#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ARM64_KVM_MMU_H__ */

From becdc5b63f089f0664fc1381fa168b7a7dce09dc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:29:28 +0000
Subject: [PATCH 011/287] arm64: KVM: user space interface

Provide the kvm.h file that defines the user space visible
interface.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 54f81d0eb93896da73d1636bca84cf90f52cabdf)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/uapi/asm/kvm.h | 117 ++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 arch/arm64/include/uapi/asm/kvm.h

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..4e64570a20c9
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1	0
+#define KVM_NR_SPSR	1
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+
+#define KVM_REG_SIZE(id)						\
+	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+	struct user_pt_regs regs;	/* sp = sp_el0 */
+
+	__u64	sp_el1;
+	__u64	elr_el1;
+
+	__u64	spsr[KVM_NR_SPSR];
+
+	struct user_fpsimd_state fp_regs;
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_AEM_V8		0
+#define KVM_ARM_TARGET_FOUNDATION_V8	1
+#define KVM_ARM_TARGET_CORTEX_A57	2
+
+#define KVM_ARM_NUM_TARGETS		3
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT	0
+#define KVM_ARM_DEVICE_TYPE_MASK	(0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT		16
+#define KVM_ARM_DEVICE_ID_MASK		(0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2		0
+
+/* Supported VGIC address types  */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST	0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU	1
+
+#define KVM_VGIC_V2_DIST_SIZE		0x1000
+#define KVM_VGIC_V2_CPU_SIZE		0x2000
+
+struct kvm_vcpu_init {
+	__u32 target;
+	__u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT		24
+#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT		16
+#define KVM_ARM_IRQ_VCPU_MASK		0xff
+#define KVM_ARM_IRQ_NUM_SHIFT		0
+#define KVM_ARM_IRQ_NUM_MASK		0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU		0
+#define KVM_ARM_IRQ_TYPE_SPI		1
+#define KVM_ARM_IRQ_TYPE_PPI		2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ		0
+#define KVM_ARM_IRQ_CPU_FIQ		1
+
+/* Highest supported SPI, from VGIC_NR_IRQS */
+#define KVM_ARM_IRQ_GIC_MAX		127
+
+#endif
+
+#endif /* __ARM_KVM_H__ */

From 068d803462e03f87953424ea6f3910578f3f293e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:15:34 +0000
Subject: [PATCH 012/287] arm64: KVM: system register handling

Provide 64bit system register handling, modeled after the cp15
handling for ARM.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 7c8c5e6a9101ea57a1c2c9faff0917e79251a21e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_coproc.h |  51 ++
 arch/arm64/include/uapi/asm/kvm.h   |  29 +
 arch/arm64/kvm/sys_regs.c           | 883 ++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.h           | 138 +++++
 include/uapi/linux/kvm.h            |   1 +
 5 files changed, 1102 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_coproc.h
 create mode 100644 arch/arm64/kvm/sys_regs.c
 create mode 100644 arch/arm64/kvm/sys_regs.h

diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
new file mode 100644
index 000000000000..9b4477acb554
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_coproc.h
+ * Copyright (C) 2012 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_COPROC_H__
+#define __ARM64_KVM_COPROC_H__
+
+#include <linux/kvm_host.h>
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+struct kvm_sys_reg_table {
+	const struct sys_reg_desc *table;
+	size_t num;
+};
+
+struct kvm_sys_reg_target_table {
+	struct kvm_sys_reg_table table64;
+};
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table);
+
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#define kvm_coproc_table_init kvm_sys_reg_table_init
+void kvm_sys_reg_table_init(void);
+
+struct kvm_one_reg;
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 4e64570a20c9..ebac919dc0ca 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -92,6 +92,35 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT	16
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / sizeof(__u32))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX		(0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK	0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT	8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR	(0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK	0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT	0
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG		(0x0013 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK	0x000000000000c000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT	14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK	0x0000000000003800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT	11
+#define KVM_REG_ARM64_SYSREG_CRN_MASK	0x0000000000000780
+#define KVM_REG_ARM64_SYSREG_CRN_SHIFT	7
+#define KVM_REG_ARM64_SYSREG_CRM_MASK	0x0000000000000078
+#define KVM_REG_ARM64_SYSREG_CRM_SHIFT	3
+#define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
+#define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
new file mode 100644
index 000000000000..52fff0ae3442
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.c
@@ -0,0 +1,883 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.com.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <trace/events/kvm.h>
+
+#include "sys_regs.h"
+
+/*
+ * All of this file is extremly similar to the ARM coproc.c, but the
+ * types are different. My gut feeling is that it should be pretty
+ * easy to merge, but that would be an ABI breakage -- again. VFP
+ * would also need to be abstracted.
+ */
+
+/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
+static u32 cache_levels;
+
+/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
+#define CSSELR_MAX 12
+
+/* Which cache CCSIDR represents depends on CSSELR value. */
+static u32 get_ccsidr(u32 csselr)
+{
+	u32 ccsidr;
+
+	/* Make sure noone else changes CSSELR during this! */
+	local_irq_disable();
+	/* Put value into CSSELR */
+	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+	isb();
+	/* Read result out of CCSIDR */
+	asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
+	local_irq_enable();
+
+	return ccsidr;
+}
+
+static void do_dc_cisw(u32 val)
+{
+	asm volatile("dc cisw, %x0" : : "r" (val));
+	dsb();
+}
+
+static void do_dc_csw(u32 val)
+{
+	asm volatile("dc csw, %x0" : : "r" (val));
+	dsb();
+}
+
+/* See note at ARM ARM B1.14.4 */
+static bool access_dcsw(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	unsigned long val;
+	int cpu;
+
+	if (!p->is_write)
+		return read_from_write_only(vcpu, p);
+
+	cpu = get_cpu();
+
+	cpumask_setall(&vcpu->arch.require_dcache_flush);
+	cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
+
+	/* If we were already preempted, take the long way around */
+	if (cpu != vcpu->arch.last_pcpu) {
+		flush_cache_all();
+		goto done;
+	}
+
+	val = *vcpu_reg(vcpu, p->Rt);
+
+	switch (p->CRm) {
+	case 6:			/* Upgrade DCISW to DCCISW, as per HCR.SWIO */
+	case 14:		/* DCCISW */
+		do_dc_cisw(val);
+		break;
+
+	case 10:		/* DCCSW */
+		do_dc_csw(val);
+		break;
+	}
+
+done:
+	put_cpu();
+
+	return true;
+}
+
+/*
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ */
+static bool pm_fake(struct kvm_vcpu *vcpu,
+		    const struct sys_reg_params *p,
+		    const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+	else
+		return read_zero(vcpu, p);
+}
+
+static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 amair;
+
+	asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
+	vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
+}
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	/*
+	 * Simply map the vcpu_id into the Aff0 field of the MPIDR.
+	 */
+	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
+}
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc sys_reg_descs[] = {
+	/* DC ISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010),
+	  access_dcsw },
+	/* DC CSW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010),
+	  access_dcsw },
+	/* DC CISW */
+	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
+	  access_dcsw },
+
+	/* MPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
+	  NULL, reset_mpidr, MPIDR_EL1 },
+	/* SCTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, SCTLR_EL1, 0x00C50078 },
+	/* CPACR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
+	  NULL, reset_val, CPACR_EL1, 0 },
+	/* TTBR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, TTBR0_EL1 },
+	/* TTBR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
+	  NULL, reset_unknown, TTBR1_EL1 },
+	/* TCR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
+	  NULL, reset_val, TCR_EL1, 0 },
+
+	/* AFSR0_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
+	  NULL, reset_unknown, AFSR0_EL1 },
+	/* AFSR1_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
+	  NULL, reset_unknown, AFSR1_EL1 },
+	/* ESR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
+	  NULL, reset_unknown, ESR_EL1 },
+	/* FAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, FAR_EL1 },
+
+	/* PMINTENSET_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
+	  pm_fake },
+	/* PMINTENCLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
+	  pm_fake },
+
+	/* MAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
+	  NULL, reset_unknown, MAIR_EL1 },
+	/* AMAIR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
+	  NULL, reset_amair_el1, AMAIR_EL1 },
+
+	/* VBAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, VBAR_EL1, 0 },
+	/* CONTEXTIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
+	  NULL, reset_val, CONTEXTIDR_EL1, 0 },
+	/* TPIDR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
+	  NULL, reset_unknown, TPIDR_EL1 },
+
+	/* CNTKCTL_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000),
+	  NULL, reset_val, CNTKCTL_EL1, 0},
+
+	/* CSSELR_EL1 */
+	{ Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, CSSELR_EL1 },
+
+	/* PMCR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
+	  pm_fake },
+	/* PMCNTENSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
+	  pm_fake },
+	/* PMCNTENCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
+	  pm_fake },
+	/* PMOVSCLR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
+	  pm_fake },
+	/* PMSWINC_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
+	  pm_fake },
+	/* PMSELR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
+	  pm_fake },
+	/* PMCEID0_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
+	  pm_fake },
+	/* PMCEID1_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
+	  pm_fake },
+	/* PMCCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
+	  pm_fake },
+	/* PMXEVTYPER_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
+	  pm_fake },
+	/* PMXEVCNTR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
+	  pm_fake },
+	/* PMUSERENR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
+	  pm_fake },
+	/* PMOVSSET_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
+	  pm_fake },
+
+	/* TPIDR_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
+	  NULL, reset_unknown, TPIDR_EL0 },
+	/* TPIDRRO_EL0 */
+	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
+	  NULL, reset_unknown, TPIDRRO_EL0 },
+};
+
+/* Target specific emulation tables */
+static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+				       struct kvm_sys_reg_target_table *table)
+{
+	target_tables[target] = table;
+}
+
+/* Get specific register table for this target. */
+static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num)
+{
+	struct kvm_sys_reg_target_table *table;
+
+	table = target_tables[target];
+	*num = table->table64.num;
+	return table->table64.table;
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+					 const struct sys_reg_desc table[],
+					 unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		const struct sys_reg_desc *r = &table[i];
+
+		if (params->Op0 != r->Op0)
+			continue;
+		if (params->Op1 != r->Op1)
+			continue;
+		if (params->CRn != r->CRn)
+			continue;
+		if (params->CRm != r->CRm)
+			continue;
+		if (params->Op2 != r->Op2)
+			continue;
+
+		return r;
+	}
+	return NULL;
+}
+
+static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *params)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+
+	table = get_target_table(vcpu->arch.target, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	if (likely(r)) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+		/* If access function fails, it should complain. */
+	} else {
+		kvm_err("Unsupported guest sys_reg access at: %lx\n",
+			*vcpu_pc(vcpu));
+		print_sys_reg_instr(params);
+	}
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
+			      const struct sys_reg_desc *table, size_t num)
+{
+	unsigned long i;
+
+	for (i = 0; i < num; i++)
+		if (table[i].reset)
+			table[i].reset(vcpu, &table[i]);
+}
+
+/**
+ * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+
+	params.Op0 = (esr >> 20) & 3;
+	params.Op1 = (esr >> 14) & 0x7;
+	params.CRn = (esr >> 10) & 0xf;
+	params.CRm = (esr >> 1) & 0xf;
+	params.Op2 = (esr >> 17) & 0x7;
+	params.Rt = (esr >> 5) & 0x1f;
+	params.is_write = !(esr & 1);
+
+	return emulate_sys_reg(vcpu, &params);
+}
+
+/******************************************************************************
+ * Userspace API
+ *****************************************************************************/
+
+static bool index_to_params(u64 id, struct sys_reg_params *params)
+{
+	switch (id & KVM_REG_SIZE_MASK) {
+	case KVM_REG_SIZE_U64:
+		/* Any unused index bits means it's not valid. */
+		if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
+			      | KVM_REG_ARM_COPROC_MASK
+			      | KVM_REG_ARM64_SYSREG_OP0_MASK
+			      | KVM_REG_ARM64_SYSREG_OP1_MASK
+			      | KVM_REG_ARM64_SYSREG_CRN_MASK
+			      | KVM_REG_ARM64_SYSREG_CRM_MASK
+			      | KVM_REG_ARM64_SYSREG_OP2_MASK))
+			return false;
+		params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP0_SHIFT);
+		params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP1_SHIFT);
+		params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRN_SHIFT);
+		params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK)
+			       >> KVM_REG_ARM64_SYSREG_CRM_SHIFT);
+		params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK)
+			       >> KVM_REG_ARM64_SYSREG_OP2_SHIFT);
+		return true;
+	default:
+		return false;
+	}
+}
+
+/* Decode an index value, and find the sys_reg_desc entry. */
+static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
+						    u64 id)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+	struct sys_reg_params params;
+
+	/* We only do sys_reg for now. */
+	if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
+		return NULL;
+
+	if (!index_to_params(id, &params))
+		return NULL;
+
+	table = get_target_table(vcpu->arch.target, &num);
+	r = find_reg(&params, table, num);
+	if (!r)
+		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	/* Not saved in the sys_reg array? */
+	if (r && !r->reg)
+		r = NULL;
+
+	return r;
+}
+
+/*
+ * These are the invariant sys_reg registers: we let the guest see the
+ * host versions of these, so they're part of the guest state.
+ *
+ * A future CPU may provide a mechanism to present different values to
+ * the guest, or a future kvm may trap them.
+ */
+
+#define FUNCTION_INVARIANT(reg)						\
+	static void get_##reg(struct kvm_vcpu *v,			\
+			      const struct sys_reg_desc *r)		\
+	{								\
+		u64 val;						\
+									\
+		asm volatile("mrs %0, " __stringify(reg) "\n"		\
+			     : "=r" (val));				\
+		((struct sys_reg_desc *)r)->val = val;			\
+	}
+
+FUNCTION_INVARIANT(midr_el1)
+FUNCTION_INVARIANT(ctr_el0)
+FUNCTION_INVARIANT(revidr_el1)
+FUNCTION_INVARIANT(id_pfr0_el1)
+FUNCTION_INVARIANT(id_pfr1_el1)
+FUNCTION_INVARIANT(id_dfr0_el1)
+FUNCTION_INVARIANT(id_afr0_el1)
+FUNCTION_INVARIANT(id_mmfr0_el1)
+FUNCTION_INVARIANT(id_mmfr1_el1)
+FUNCTION_INVARIANT(id_mmfr2_el1)
+FUNCTION_INVARIANT(id_mmfr3_el1)
+FUNCTION_INVARIANT(id_isar0_el1)
+FUNCTION_INVARIANT(id_isar1_el1)
+FUNCTION_INVARIANT(id_isar2_el1)
+FUNCTION_INVARIANT(id_isar3_el1)
+FUNCTION_INVARIANT(id_isar4_el1)
+FUNCTION_INVARIANT(id_isar5_el1)
+FUNCTION_INVARIANT(clidr_el1)
+FUNCTION_INVARIANT(aidr_el1)
+
+/* ->val is filled in by kvm_sys_reg_table_init() */
+static struct sys_reg_desc invariant_sys_regs[] = {
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, get_midr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110),
+	  NULL, get_revidr_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  NULL, get_id_pfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001),
+	  NULL, get_id_pfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010),
+	  NULL, get_id_dfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011),
+	  NULL, get_id_afr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100),
+	  NULL, get_id_mmfr0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101),
+	  NULL, get_id_mmfr1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110),
+	  NULL, get_id_mmfr2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111),
+	  NULL, get_id_mmfr3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  NULL, get_id_isar0_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001),
+	  NULL, get_id_isar1_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  NULL, get_id_isar2_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011),
+	  NULL, get_id_isar3_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100),
+	  NULL, get_id_isar4_el1 },
+	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101),
+	  NULL, get_id_isar5_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_clidr_el1 },
+	{ Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111),
+	  NULL, get_aidr_el1 },
+	{ Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001),
+	  NULL, get_ctr_el0 },
+};
+
+static int reg_from_user(void *val, const void __user *uaddr, u64 id)
+{
+	/* This Just Works because we are little endian. */
+	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int reg_to_user(void __user *uaddr, const void *val, u64 id)
+{
+	/* This Just Works because we are little endian. */
+	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
+		return -EFAULT;
+	return 0;
+}
+
+static int get_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	return reg_to_user(uaddr, &r->val, id);
+}
+
+static int set_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+	struct sys_reg_params params;
+	const struct sys_reg_desc *r;
+	int err;
+	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+
+	if (!index_to_params(id, &params))
+		return -ENOENT;
+	r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+	if (!r)
+		return -ENOENT;
+
+	err = reg_from_user(&val, uaddr, id);
+	if (err)
+		return err;
+
+	/* This is what we mean by invariant: you can't change it. */
+	if (r->val != val)
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool is_valid_cache(u32 val)
+{
+	u32 level, ctype;
+
+	if (val >= CSSELR_MAX)
+		return -ENOENT;
+
+	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
+	level = (val >> 1);
+	ctype = (cache_levels >> (level * 3)) & 7;
+
+	switch (ctype) {
+	case 0: /* No cache */
+		return false;
+	case 1: /* Instruction cache only */
+		return (val & 1);
+	case 2: /* Data cache only */
+	case 4: /* Unified cache */
+		return !(val & 1);
+	case 3: /* Separate instruction and data caches */
+		return true;
+	default: /* Reserved: we can't know instruction or data. */
+		return false;
+	}
+}
+
+static int demux_c15_get(u64 id, void __user *uaddr)
+{
+	u32 val;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		return put_user(get_ccsidr(val), uval);
+	default:
+		return -ENOENT;
+	}
+}
+
+static int demux_c15_set(u64 id, void __user *uaddr)
+{
+	u32 val, newval;
+	u32 __user *uval = uaddr;
+
+	/* Fail if we have unknown bits set. */
+	if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+		   | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+		return -ENOENT;
+
+	switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+	case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+		if (KVM_REG_SIZE(id) != 4)
+			return -ENOENT;
+		val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+			>> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+		if (!is_valid_cache(val))
+			return -ENOENT;
+
+		if (get_user(newval, uval))
+			return -EFAULT;
+
+		/* This is also invariant: you can't change it. */
+		if (newval != get_ccsidr(val))
+			return -EINVAL;
+		return 0;
+	default:
+		return -ENOENT;
+	}
+}
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_get(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return get_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+}
+
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	const struct sys_reg_desc *r;
+	void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+		return demux_c15_set(reg->id, uaddr);
+
+	if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+		return -ENOENT;
+
+	r = index_to_sys_reg_desc(vcpu, reg->id);
+	if (!r)
+		return set_invariant_sys_reg(reg->id, uaddr);
+
+	return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+}
+
+static unsigned int num_demux_regs(void)
+{
+	unsigned int i, count = 0;
+
+	for (i = 0; i < CSSELR_MAX; i++)
+		if (is_valid_cache(i))
+			count++;
+
+	return count;
+}
+
+static int write_demux_regids(u64 __user *uindices)
+{
+	u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+	unsigned int i;
+
+	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
+	for (i = 0; i < CSSELR_MAX; i++) {
+		if (!is_valid_cache(i))
+			continue;
+		if (put_user(val | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+	return 0;
+}
+
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
+{
+	return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |
+		KVM_REG_ARM64_SYSREG |
+		(reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) |
+		(reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) |
+		(reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) |
+		(reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) |
+		(reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT));
+}
+
+static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
+{
+	if (!*uind)
+		return true;
+
+	if (put_user(sys_reg_to_index(reg), *uind))
+		return false;
+
+	(*uind)++;
+	return true;
+}
+
+/* Assumed ordered tables, see kvm_sys_reg_table_init. */
+static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
+{
+	const struct sys_reg_desc *i1, *i2, *end1, *end2;
+	unsigned int total = 0;
+	size_t num;
+
+	/* We check for duplicates here, to allow arch-specific overrides. */
+	i1 = get_target_table(vcpu->arch.target, &num);
+	end1 = i1 + num;
+	i2 = sys_reg_descs;
+	end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
+
+	BUG_ON(i1 == end1 || i2 == end2);
+
+	/* Walk carefully, as both tables may refer to the same register. */
+	while (i1 || i2) {
+		int cmp = cmp_sys_reg(i1, i2);
+		/* target-specific overrides generic entry. */
+		if (cmp <= 0) {
+			/* Ignore registers we trap but don't save. */
+			if (i1->reg) {
+				if (!copy_reg_to_user(i1, &uind))
+					return -EFAULT;
+				total++;
+			}
+		} else {
+			/* Ignore registers we trap but don't save. */
+			if (i2->reg) {
+				if (!copy_reg_to_user(i2, &uind))
+					return -EFAULT;
+				total++;
+			}
+		}
+
+		if (cmp <= 0 && ++i1 == end1)
+			i1 = NULL;
+		if (cmp >= 0 && ++i2 == end2)
+			i2 = NULL;
+	}
+	return total;
+}
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
+{
+	return ARRAY_SIZE(invariant_sys_regs)
+		+ num_demux_regs()
+		+ walk_sys_regs(vcpu, (u64 __user *)NULL);
+}
+
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	int err;
+
+	/* Then give them all the invariant registers' indices. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
+		if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	err = walk_sys_regs(vcpu, uindices);
+	if (err < 0)
+		return err;
+	uindices += err;
+
+	return write_demux_regids(uindices);
+}
+
+void kvm_sys_reg_table_init(void)
+{
+	unsigned int i;
+	struct sys_reg_desc clidr;
+
+	/* Make sure tables are unique and in order. */
+	for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++)
+		BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0);
+
+	/* We abuse the reset function to overwrite the table itself. */
+	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
+		invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
+
+	/*
+	 * CLIDR format is awkward, so clean it up.  See ARM B4.1.20:
+	 *
+	 *   If software reads the Cache Type fields from Ctype1
+	 *   upwards, once it has seen a value of 0b000, no caches
+	 *   exist at further-out levels of the hierarchy. So, for
+	 *   example, if Ctype3 is the first Cache Type field with a
+	 *   value of 0b000, the values of Ctype4 to Ctype7 must be
+	 *   ignored.
+	 */
+	get_clidr_el1(NULL, &clidr); /* Ugly... */
+	cache_levels = clidr.val;
+	for (i = 0; i < 7; i++)
+		if (((cache_levels >> (i*3)) & 7) == 0)
+			break;
+	/* Clear all higher bits. */
+	cache_levels &= (1 << (i*3))-1;
+}
+
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
+{
+	size_t num;
+	const struct sys_reg_desc *table;
+
+	/* Catch someone adding a register without putting in reset entry. */
+	memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs));
+
+	/* Generic chip reset first (so target could override). */
+	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+	table = get_target_table(vcpu->arch.target, &num);
+	reset_sys_reg_descs(vcpu, table, num);
+
+	for (num = 1; num < NR_SYS_REGS; num++)
+		if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+			panic("Didn't reset vcpu_sys_reg(%zi)", num);
+}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
new file mode 100644
index 000000000000..d50d3722998e
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
+#define __ARM64_KVM_SYS_REGS_LOCAL_H__
+
+struct sys_reg_params {
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+	u8	Rt;
+	bool	is_write;
+};
+
+struct sys_reg_desc {
+	/* MRS/MSR instruction which accesses it. */
+	u8	Op0;
+	u8	Op1;
+	u8	CRn;
+	u8	CRm;
+	u8	Op2;
+
+	/* Trapped access from guest, if non-NULL. */
+	bool (*access)(struct kvm_vcpu *,
+		       const struct sys_reg_params *,
+		       const struct sys_reg_desc *);
+
+	/* Initialization for vcpu. */
+	void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
+
+	/* Index into sys_reg[], or 0 if we don't need to save it. */
+	int reg;
+
+	/* Value (usually reset value) */
+	u64 val;
+};
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+	/* Look, we even formatted it for you to paste into the table! */
+	kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+		      p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+}
+
+static inline bool ignore_write(struct kvm_vcpu *vcpu,
+				const struct sys_reg_params *p)
+{
+	return true;
+}
+
+static inline bool read_zero(struct kvm_vcpu *vcpu,
+			     const struct sys_reg_params *p)
+{
+	*vcpu_reg(vcpu, p->Rt) = 0;
+	return true;
+}
+
+static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
+				      const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg write to read-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
+					const struct sys_reg_params *params)
+{
+	kvm_debug("sys_reg read to write-only register at: %lx\n",
+		  *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	return false;
+}
+
+/* Reset functions */
+static inline void reset_unknown(struct kvm_vcpu *vcpu,
+				 const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+}
+
+static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	BUG_ON(!r->reg);
+	BUG_ON(r->reg >= NR_SYS_REGS);
+	vcpu_sys_reg(vcpu, r->reg) = r->val;
+}
+
+static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
+			      const struct sys_reg_desc *i2)
+{
+	BUG_ON(i1 == i2);
+	if (!i1)
+		return 1;
+	else if (!i2)
+		return -1;
+	if (i1->Op0 != i2->Op0)
+		return i1->Op0 - i2->Op0;
+	if (i1->Op1 != i2->Op1)
+		return i1->Op1 - i2->Op1;
+	if (i1->CRn != i2->CRn)
+		return i1->CRn - i2->CRn;
+	if (i1->CRm != i2->CRm)
+		return i1->CRm - i2->CRm;
+	return i1->Op2 - i2->Op2;
+}
+
+
+#define Op0(_x) 	.Op0 = _x
+#define Op1(_x) 	.Op1 = _x
+#define CRn(_x)		.CRn = _x
+#define CRm(_x) 	.CRm = _x
+#define Op2(_x) 	.Op2 = _x
+
+#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d88c8ee00c8b..97277d333e82 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -783,6 +783,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_IA64		0x3000000000000000ULL
 #define KVM_REG_ARM		0x4000000000000000ULL
 #define KVM_REG_S390		0x5000000000000000ULL
+#define KVM_REG_ARM64		0x6000000000000000ULL
 #define KVM_REG_MIPS		0x7000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT	52

From 3107348ebf0b00722a27b7a4a32c1f4c34fdb1e2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 17:30:48 +0000
Subject: [PATCH 013/287] arm64: KVM: CPU specific system registers handling

Add the support code for CPU specific system registers. Not much
here yet.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b990a9d3152bddca62cc1f8bf80518430b98737b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs_generic_v8.c | 85 ++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 arch/arm64/kvm/sys_regs_generic_v8.c

diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
new file mode 100644
index 000000000000..d4e803907312
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/coproc_a15.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kvm_host.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <linux/init.h>
+
+#include "sys_regs.h"
+
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+	return true;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+	u64 actlr;
+
+	asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr));
+	vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr;
+}
+
+/*
+ * Implementation specific sys-reg registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc genericv8_sys_regs[] = {
+	/* ACTLR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr, reset_actlr, ACTLR_EL1 },
+};
+
+static struct kvm_sys_reg_target_table genericv8_target_table = {
+	.table64 = {
+		.table = genericv8_sys_regs,
+		.num = ARRAY_SIZE(genericv8_sys_regs),
+	},
+};
+
+static int __init sys_reg_genericv8_init(void)
+{
+	unsigned int i;
+
+	for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
+		BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
+			       &genericv8_sys_regs[i]) >= 0);
+
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
+					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
+					  &genericv8_target_table);
+	return 0;
+}
+late_initcall(sys_reg_genericv8_init);

From cac664305a439463255eb7954ecd375b39f4299f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:23:59 +0000
Subject: [PATCH 014/287] arm64: KVM: virtual CPU reset

Provide the reset code for a virtual CPU booted in 64bit mode.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f4672752c321ea36ce099cebdd7a082a8f327505)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/reset.c | 76 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 arch/arm64/kvm/reset.c

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
new file mode 100644
index 000000000000..f6536a06231a
--- /dev/null
+++ b/arch/arm64/kvm/reset.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/reset.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include <asm/cputype.h>
+#include <asm/ptrace.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_coproc.h>
+
+/*
+ * ARMv8 Reset Values
+ */
+static const struct kvm_regs default_regs_reset = {
+	.regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
+			PSR_F_BIT | PSR_D_BIT),
+};
+
+int kvm_arch_dev_ioctl_check_extension(long ext)
+{
+	int r;
+
+	switch (ext) {
+	default:
+		r = 0;
+	}
+
+	return r;
+}
+
+/**
+ * kvm_reset_vcpu - sets core registers and sys_regs to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on
+ * the virtual CPU struct to their architectually defined reset
+ * values.
+ */
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+	const struct kvm_regs *cpu_reset;
+
+	switch (vcpu->arch.target) {
+	default:
+		cpu_reset = &default_regs_reset;
+		break;
+	}
+
+	/* Reset core registers */
+	memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
+
+	/* Reset system registers */
+	kvm_reset_sys_regs(vcpu);
+
+	return 0;
+}

From 913d79134dee46fc407ad2f3819bb5dce2f47e38 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:29:28 +0000
Subject: [PATCH 015/287] arm64: KVM: kvm_arch and kvm_vcpu_arch definitions

Provide the architecture dependent structures for VM and
vcpu abstractions.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 4f8d6632ec71372a3b8dbb4775662c2c9025d173)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 186 ++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_host.h

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
new file mode 100644
index 000000000000..4a2622f5e81f
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_host.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HOST_H__
+#define __ARM64_KVM_HOST_H__
+
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
+
+#define KVM_MAX_VCPUS 4
+#define KVM_USER_MEM_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
+
+#define KVM_VCPU_MAX_FEATURES 0
+
+/* We don't currently support large pages. */
+#define KVM_HPAGE_GFN_SHIFT(x)	0
+#define KVM_NR_PAGE_SIZES	1
+#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
+
+struct kvm_vcpu;
+int kvm_target_cpu(void);
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+int kvm_arch_dev_ioctl_check_extension(long ext);
+
+struct kvm_arch {
+	/* The VMID generation used for the virt. memory system */
+	u64    vmid_gen;
+	u32    vmid;
+
+	/* 1-level 2nd stage table and lock */
+	spinlock_t pgd_lock;
+	pgd_t *pgd;
+
+	/* VTTBR value associated with above pgd and vmid */
+	u64    vttbr;
+
+	/* Interrupt controller */
+	struct vgic_dist	vgic;
+
+	/* Timer */
+	struct arch_timer_kvm	timer;
+};
+
+#define KVM_NR_MEM_OBJS     40
+
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
+struct kvm_mmu_memory_cache {
+	int nobjs;
+	void *objects[KVM_NR_MEM_OBJS];
+};
+
+struct kvm_vcpu_fault_info {
+	u32 esr_el2;		/* Hyp Syndrom Register */
+	u64 far_el2;		/* Hyp Fault Address Register */
+	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
+};
+
+struct kvm_cpu_context {
+	struct kvm_regs	gp_regs;
+	u64 sys_regs[NR_SYS_REGS];
+};
+
+typedef struct kvm_cpu_context kvm_cpu_context_t;
+
+struct kvm_vcpu_arch {
+	struct kvm_cpu_context ctxt;
+
+	/* HYP configuration */
+	u64 hcr_el2;
+
+	/* Exception Information */
+	struct kvm_vcpu_fault_info fault;
+
+	/* Pointer to host CPU context */
+	kvm_cpu_context_t *host_cpu_context;
+
+	/* VGIC state */
+	struct vgic_cpu vgic_cpu;
+	struct arch_timer_cpu timer_cpu;
+
+	/*
+	 * Anything that is not used directly from assembly code goes
+	 * here.
+	 */
+	/* dcache set/way operation pending */
+	int last_pcpu;
+	cpumask_t require_dcache_flush;
+
+	/* Don't run the guest */
+	bool pause;
+
+	/* IO related fields */
+	struct kvm_decode mmio_decode;
+
+	/* Interrupt related fields */
+	u64 irq_lines;		/* IRQ and FIQ levels */
+
+	/* Cache some mmu pages needed inside spinlock regions */
+	struct kvm_mmu_memory_cache mmu_page_cache;
+
+	/* Target CPU and feature flags */
+	u32 target;
+	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+
+	/* Detect first run of a vcpu */
+	bool has_run_once;
+};
+
+#define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
+#define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
+#define vcpu_cp15(v,r)		((v)->arch.ctxt.cp15[(r)])
+
+struct kvm_vm_stat {
+	u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+	u32 halt_wakeup;
+};
+
+struct kvm_vcpu_init;
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			const struct kvm_vcpu_init *init);
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+struct kvm_one_reg;
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+			unsigned long start, unsigned long end);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
+struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+
+u64 kvm_call_hyp(void *hypfn, ...);
+
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		int exception_index);
+
+int kvm_perf_init(void);
+int kvm_perf_teardown(void);
+
+#endif /* __ARM64_KVM_HOST_H__ */

From 56a8fbf4b8d368562273336cf8f43068aaed91a3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:29:50 +0000
Subject: [PATCH 016/287] arm64: KVM: MMIO access backend

Define the necessary structures to perform an MMIO access.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit d7246bf3571a82834984a42db52261525bc11159)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_mmio.h | 59 +++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 arch/arm64/include/asm/kvm_mmio.h

diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
new file mode 100644
index 000000000000..fc2f689c0694
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMIO_H__
+#define __ARM64_KVM_MMIO_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/*
+ * This is annoying. The mmio code requires this, even if we don't
+ * need any decoding. To be fixed.
+ */
+struct kvm_decode {
+	unsigned long rt;
+	bool sign_extend;
+};
+
+/*
+ * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_mmio {
+	phys_addr_t	phys_addr;
+	u8		data[8];
+	u32		len;
+	bool		is_write;
+};
+
+static inline void kvm_prepare_mmio(struct kvm_run *run,
+				    struct kvm_exit_mmio *mmio)
+{
+	run->mmio.phys_addr	= mmio->phys_addr;
+	run->mmio.len		= mmio->len;
+	run->mmio.is_write	= mmio->is_write;
+	memcpy(run->mmio.data, mmio->data, mmio->len);
+	run->exit_reason	= KVM_EXIT_MMIO;
+}
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		 phys_addr_t fault_ipa);
+
+#endif	/* __ARM64_KVM_MMIO_H__ */

From f98733fb49495d20251f55dc1ddff88f5761a797 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:37:02 +0000
Subject: [PATCH 017/287] arm64: KVM: guest one-reg interface

Let userspace play with the guest registers.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 2f4a07c5f9fe4a5cdb9867e1e2fcab3165846ea7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/guest.c | 259 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 259 insertions(+)
 create mode 100644 arch/arm64/kvm/guest.c

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
new file mode 100644
index 000000000000..3d7518a7ebaa
--- /dev/null
+++ b/arch/arm64/kvm/guest.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/guest.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputype.h>
+#include <asm/uaccess.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+	return 0;
+}
+
+static u64 core_reg_offset_from_id(u64 id)
+{
+	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
+}
+
+static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/*
+	 * Because the kvm_regs structure is a mix of 32, 64 and
+	 * 128bit fields, we index it as if it was a 32bit
+	 * array. Hence below, nr_regs is the number of entries, and
+	 * off the index in the "array".
+	 */
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	u32 off;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	__u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+	struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+	int nr_regs = sizeof(*regs) / sizeof(__u32);
+	__uint128_t tmp;
+	void *valp = &tmp;
+	u64 off;
+	int err = 0;
+
+	/* Our ID is an index into the kvm_regs struct. */
+	off = core_reg_offset_from_id(reg->id);
+	if (off >= nr_regs ||
+	    (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+		return -ENOENT;
+
+	if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
+		return -EINVAL;
+
+	if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
+		u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK;
+		switch (mode) {
+		case PSR_MODE_EL0t:
+		case PSR_MODE_EL1t:
+		case PSR_MODE_EL1h:
+			break;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+out:
+	return err;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	return -EINVAL;
+}
+
+static unsigned long num_core_regs(void)
+{
+	return sizeof(struct kvm_regs) / sizeof(__u32);
+}
+
+/**
+ * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
+{
+	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu);
+}
+
+/**
+ * kvm_arm_copy_reg_indices - get indices of all registers.
+ *
+ * We do core registers right here, then we apppend system regs.
+ */
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	unsigned int i;
+	const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+
+	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
+		if (put_user(core_reg | i, uindices))
+			return -EFAULT;
+		uindices++;
+	}
+
+	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
+}
+
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we want a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return get_core_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_get_reg(vcpu, reg);
+}
+
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	/* We currently use nothing arch-specific in upper 32 bits */
+	if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+		return -EINVAL;
+
+	/* Register group 16 means we set a core register. */
+	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+		return set_core_reg(vcpu, reg);
+
+	return kvm_arm_sys_reg_set_reg(vcpu, reg);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+	unsigned long implementor = read_cpuid_implementor();
+	unsigned long part_number = read_cpuid_part_number();
+
+	if (implementor != ARM_CPU_IMP_ARM)
+		return -EINVAL;
+
+	switch (part_number) {
+	case ARM_CPU_PART_AEM_V8:
+		return KVM_ARM_TARGET_AEM_V8;
+	case ARM_CPU_PART_FOUNDATION:
+		return KVM_ARM_TARGET_FOUNDATION_V8;
+	case ARM_CPU_PART_CORTEX_A57:
+		/* Currently handled by the generic backend */
+		return KVM_ARM_TARGET_CORTEX_A57;
+	default:
+		return -EINVAL;
+	}
+}
+
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	vcpu->arch.target = phys_target;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		if (init->features[i / 32] & (1 << (i % 32))) {
+			if (i >= KVM_VCPU_MAX_FEATURES)
+				return -ENOENT;
+			set_bit(i, vcpu->arch.features);
+		}
+	}
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL;
+}

From f756a17fc0ab844b88c1282cf5df2ffb03185825 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 17 Dec 2012 17:07:52 +0000
Subject: [PATCH 018/287] arm64: KVM: hypervisor initialization code

Provide EL2 with page tables and stack, and set the vectors
to point to the full blown world-switch code.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 092bd143cbb481b4ce1d55247a2987eaaf61f967)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  13 ++++
 arch/arm64/kvm/hyp-init.S         | 107 ++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp-init.S

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4a2622f5e81f..2500eb6a4d2a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -183,4 +183,17 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
+				       unsigned long hyp_stack_ptr,
+				       unsigned long vector_ptr)
+{
+	/*
+	 * Call initialization code, and switch to the full blown
+	 * HYP code.
+	 */
+	kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+		     hyp_stack_ptr, vector_ptr);
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
new file mode 100644
index 000000000000..ba84e6705e20
--- /dev/null
+++ b/arch/arm64/kvm/hyp-init.S
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.idmap.text, "ax"
+
+	.align	11
+
+ENTRY(__kvm_hyp_init)
+	ventry	__invalid		// Synchronous EL2t
+	ventry	__invalid		// IRQ EL2t
+	ventry	__invalid		// FIQ EL2t
+	ventry	__invalid		// Error EL2t
+
+	ventry	__invalid		// Synchronous EL2h
+	ventry	__invalid		// IRQ EL2h
+	ventry	__invalid		// FIQ EL2h
+	ventry	__invalid		// Error EL2h
+
+	ventry	__do_hyp_init		// Synchronous 64-bit EL1
+	ventry	__invalid		// IRQ 64-bit EL1
+	ventry	__invalid		// FIQ 64-bit EL1
+	ventry	__invalid		// Error 64-bit EL1
+
+	ventry	__invalid		// Synchronous 32-bit EL1
+	ventry	__invalid		// IRQ 32-bit EL1
+	ventry	__invalid		// FIQ 32-bit EL1
+	ventry	__invalid		// Error 32-bit EL1
+
+__invalid:
+	b	.
+
+	/*
+	 * x0: HYP boot pgd
+	 * x1: HYP pgd
+	 * x2: HYP stack
+	 * x3: HYP vectors
+	 */
+__do_hyp_init:
+
+	msr	ttbr0_el2, x0
+
+	mrs	x4, tcr_el1
+	ldr	x5, =TCR_EL2_MASK
+	and	x4, x4, x5
+	ldr	x5, =TCR_EL2_FLAGS
+	orr	x4, x4, x5
+	msr	tcr_el2, x4
+
+	ldr	x4, =VTCR_EL2_FLAGS
+	msr	vtcr_el2, x4
+
+	mrs	x4, mair_el1
+	msr	mair_el2, x4
+	isb
+
+	mov	x4, #SCTLR_EL2_FLAGS
+	msr	sctlr_el2, x4
+	isb
+
+	/* MMU is now enabled. Get ready for the trampoline dance */
+	ldr	x4, =TRAMPOLINE_VA
+	adr	x5, target
+	bfi	x4, x5, #0, #PAGE_SHIFT
+	br	x4
+
+target: /* We're now in the trampoline code, switch page tables */
+	msr	ttbr0_el2, x1
+	isb
+
+	/* Invalidate the old TLBs */
+	tlbi	alle2
+	dsb	sy
+
+	/* Set the stack and new vectors */
+	kern_hyp_va	x2
+	mov	sp, x2
+	kern_hyp_va	x3
+	msr	vbar_el2, x3
+
+	/* Hello, World! */
+	eret
+ENDPROC(__kvm_hyp_init)
+
+	.ltorg
+
+	.popsection

From 066bddb132227937d1f68dc5ec4554786e393345 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:40:18 +0000
Subject: [PATCH 019/287] arm64: KVM: HYP mode world switch implementation

The HYP mode world switch in all its glory.

Implements save/restore of host/guest registers, EL2 trapping,
IPA resolution, and additional services (tlb invalidation).

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 55c7401d92e16360e0987afe39355f1eb6300f31)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kernel/asm-offsets.c |  33 ++
 arch/arm64/kvm/hyp.S            | 617 ++++++++++++++++++++++++++++++++
 2 files changed, 650 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp.S

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a2a4d810bea3..49c162c03b69 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -104,5 +104,38 @@ int main(void)
   BLANK();
   DEFINE(TZ_MINWEST,		offsetof(struct timezone, tz_minuteswest));
   DEFINE(TZ_DSTTIME,		offsetof(struct timezone, tz_dsttime));
+  BLANK();
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_CONTEXT,		offsetof(struct kvm_vcpu, arch.ctxt));
+  DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
+  DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
+  DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
+  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1));
+  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
+  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
+  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
+  DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
+  DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
+  DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
+  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
+  DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
+  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
+  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
+  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
+  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
+  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
+  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
+  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
+  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
+  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
+  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
+#endif
   return 0;
 }
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
new file mode 100644
index 000000000000..0b18c2e1e043
--- /dev/null
+++ b/arch/arm64/kvm/hyp.S
@@ -0,0 +1,617 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SPSR_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
+#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
+
+	.text
+	.pushsection	.hyp.text, "ax"
+	.align	PAGE_SHIFT
+
+__kvm_hyp_code_start:
+	.globl __kvm_hyp_code_start
+
+.macro save_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	stp	x19, x20, [x3]
+	stp	x21, x22, [x3, #16]
+	stp	x23, x24, [x3, #32]
+	stp	x25, x26, [x3, #48]
+	stp	x27, x28, [x3, #64]
+	stp	x29, lr, [x3, #80]
+
+	mrs	x19, sp_el0
+	mrs	x20, elr_el2		// EL1 PC
+	mrs	x21, spsr_el2		// EL1 pstate
+
+	stp	x19, x20, [x3, #96]
+	str	x21, [x3, #112]
+
+	mrs	x22, sp_el1
+	mrs	x23, elr_el1
+	mrs	x24, spsr_el1
+
+	str	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	str	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	str	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+.endm
+
+.macro restore_common_regs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+	ldr	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+	ldr	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+
+	msr	sp_el1, x22
+	msr	elr_el1, x23
+	msr	spsr_el1, x24
+
+	add	x3, x2, #CPU_XREG_OFFSET(31)    // SP_EL0
+	ldp	x19, x20, [x3]
+	ldr	x21, [x3, #16]
+
+	msr	sp_el0, x19
+	msr	elr_el2, x20 				// EL1 PC
+	msr	spsr_el2, x21 				// EL1 pstate
+
+	add	x3, x2, #CPU_XREG_OFFSET(19)
+	ldp	x19, x20, [x3]
+	ldp	x21, x22, [x3, #16]
+	ldp	x23, x24, [x3, #32]
+	ldp	x25, x26, [x3, #48]
+	ldp	x27, x28, [x3, #64]
+	ldp	x29, lr, [x3, #80]
+.endm
+
+.macro save_host_regs
+	save_common_regs
+.endm
+
+.macro restore_host_regs
+	restore_common_regs
+.endm
+
+.macro save_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_save x3, 4
+.endm
+
+.macro restore_fpsimd
+	// x2: cpu context address
+	// x3, x4: tmp regs
+	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	fpsimd_restore x3, 4
+.endm
+
+.macro save_guest_regs
+	// x0 is the vcpu address
+	// x1 is the return code, do not corrupt!
+	// x2 is the cpu context
+	// x3 is a tmp register
+	// Guest's x0-x3 are on the stack
+
+	// Compute base to save registers
+	add	x3, x2, #CPU_XREG_OFFSET(4)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	str	x18, [x3, #112]
+
+	pop	x6, x7			// x2, x3
+	pop	x4, x5			// x0, x1
+
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	save_common_regs
+.endm
+
+.macro restore_guest_regs
+	// x0 is the vcpu address.
+	// x2 is the cpu context
+	// x3 is a tmp register
+
+	// Prepare x0-x3 for later restore
+	add	x3, x2, #CPU_XREG_OFFSET(0)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	push	x4, x5		// Push x0-x3 on the stack
+	push	x6, x7
+
+	// x4-x18
+	ldp	x4, x5, [x3, #32]
+	ldp	x6, x7, [x3, #48]
+	ldp	x8, x9, [x3, #64]
+	ldp	x10, x11, [x3, #80]
+	ldp	x12, x13, [x3, #96]
+	ldp	x14, x15, [x3, #112]
+	ldp	x16, x17, [x3, #128]
+	ldr	x18, [x3, #144]
+
+	// x19-x29, lr, sp*, elr*, spsr*
+	restore_common_regs
+
+	// Last bits of the 64bit state
+	pop	x2, x3
+	pop	x0, x1
+
+	// Do not touch any register after this!
+.endm
+
+/*
+ * Macros to perform system register save/restore.
+ *
+ * Ordering here is absolutely critical, and must be kept consistent
+ * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
+ * and in kvm_asm.h.
+ *
+ * In other words, don't touch any of these unless you know what
+ * you are doing.
+ */
+.macro save_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	mrs	x4,	vmpidr_el2
+	mrs	x5,	csselr_el1
+	mrs	x6,	sctlr_el1
+	mrs	x7,	actlr_el1
+	mrs	x8,	cpacr_el1
+	mrs	x9,	ttbr0_el1
+	mrs	x10,	ttbr1_el1
+	mrs	x11,	tcr_el1
+	mrs	x12,	esr_el1
+	mrs	x13, 	afsr0_el1
+	mrs	x14,	afsr1_el1
+	mrs	x15,	far_el1
+	mrs	x16,	mair_el1
+	mrs	x17,	vbar_el1
+	mrs	x18,	contextidr_el1
+	mrs	x19,	tpidr_el0
+	mrs	x20,	tpidrro_el0
+	mrs	x21,	tpidr_el1
+	mrs	x22, 	amair_el1
+	mrs	x23, 	cntkctl_el1
+
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+	stp	x8, x9, [x3, #32]
+	stp	x10, x11, [x3, #48]
+	stp	x12, x13, [x3, #64]
+	stp	x14, x15, [x3, #80]
+	stp	x16, x17, [x3, #96]
+	stp	x18, x19, [x3, #112]
+	stp	x20, x21, [x3, #128]
+	stp	x22, x23, [x3, #144]
+.endm
+
+.macro restore_sysregs
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	ldp	x8, x9, [x3, #32]
+	ldp	x10, x11, [x3, #48]
+	ldp	x12, x13, [x3, #64]
+	ldp	x14, x15, [x3, #80]
+	ldp	x16, x17, [x3, #96]
+	ldp	x18, x19, [x3, #112]
+	ldp	x20, x21, [x3, #128]
+	ldp	x22, x23, [x3, #144]
+
+	msr	vmpidr_el2,	x4
+	msr	csselr_el1,	x5
+	msr	sctlr_el1,	x6
+	msr	actlr_el1,	x7
+	msr	cpacr_el1,	x8
+	msr	ttbr0_el1,	x9
+	msr	ttbr1_el1,	x10
+	msr	tcr_el1,	x11
+	msr	esr_el1,	x12
+	msr	afsr0_el1,	x13
+	msr	afsr1_el1,	x14
+	msr	far_el1,	x15
+	msr	mair_el1,	x16
+	msr	vbar_el1,	x17
+	msr	contextidr_el1,	x18
+	msr	tpidr_el0,	x19
+	msr	tpidrro_el0,	x20
+	msr	tpidr_el1,	x21
+	msr	amair_el1,	x22
+	msr	cntkctl_el1,	x23
+.endm
+
+.macro activate_traps
+	ldr	x2, [x0, #VCPU_IRQ_LINES]
+	ldr	x1, [x0, #VCPU_HCR_EL2]
+	orr	x2, x2, x1
+	msr	hcr_el2, x2
+
+	ldr	x2, =(CPTR_EL2_TTA)
+	msr	cptr_el2, x2
+
+	ldr	x2, =(1 << 15)	// Trap CP15 Cr=15
+	msr	hstr_el2, x2
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+	msr	mdcr_el2, x2
+.endm
+
+.macro deactivate_traps
+	mov	x2, #HCR_RW
+	msr	hcr_el2, x2
+	msr	cptr_el2, xzr
+	msr	hstr_el2, xzr
+
+	mrs	x2, mdcr_el2
+	and	x2, x2, #MDCR_EL2_HPMN_MASK
+	msr	mdcr_el2, x2
+.endm
+
+.macro activate_vm
+	ldr	x1, [x0, #VCPU_KVM]
+	kern_hyp_va	x1
+	ldr	x2, [x1, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+.endm
+
+.macro deactivate_vm
+	msr	vttbr_el2, xzr
+.endm
+
+__save_sysregs:
+	save_sysregs
+	ret
+
+__restore_sysregs:
+	restore_sysregs
+	ret
+
+__save_fpsimd:
+	save_fpsimd
+	ret
+
+__restore_fpsimd:
+	restore_fpsimd
+	ret
+
+/*
+ * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+ *
+ * This is the world switch. The first half of the function
+ * deals with entering the guest, and anything from __kvm_vcpu_return
+ * to the end of the function deals with reentering the host.
+ * On the enter path, only x0 (vcpu pointer) must be preserved until
+ * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
+ * code) must both be preserved until the epilogue.
+ * In both cases, x2 points to the CPU context we're saving/restoring from/to.
+ */
+ENTRY(__kvm_vcpu_run)
+	kern_hyp_va	x0
+	msr	tpidr_el2, x0	// Save the vcpu register
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	save_host_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	activate_traps
+	activate_vm
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+	restore_guest_regs
+
+	// That's it, no more messing around.
+	eret
+
+__kvm_vcpu_return:
+	// Assume x0 is the vcpu pointer, x1 the return code
+	// Guest's x0-x3 are on the stack
+
+	// Guest context
+	add	x2, x0, #VCPU_CONTEXT
+
+	save_guest_regs
+	bl __save_fpsimd
+	bl __save_sysregs
+
+	deactivate_traps
+	deactivate_vm
+
+	// Host context
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+	bl __restore_fpsimd
+	restore_host_regs
+
+	mov	x0, x1
+	ret
+END(__kvm_vcpu_run)
+
+// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+ENTRY(__kvm_tlb_flush_vmid_ipa)
+	kern_hyp_va	x0
+	ldr	x2, [x0, #KVM_VTTBR]
+	msr	vttbr_el2, x2
+	isb
+
+	/*
+	 * We could do so much better if we had the VA as well.
+	 * Instead, we invalidate Stage-2 for this IPA, and the
+	 * whole of Stage-1. Weep...
+	 */
+	tlbi	ipas2e1is, x1
+	dsb	sy
+	tlbi	vmalle1is
+	dsb	sy
+	isb
+
+	msr	vttbr_el2, xzr
+	ret
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
+
+ENTRY(__kvm_flush_vm_context)
+	tlbi	alle1is
+	ic	ialluis
+	dsb	sy
+	ret
+ENDPROC(__kvm_flush_vm_context)
+
+__kvm_hyp_panic:
+	// Guess the context by looking at VTTBR:
+	// If zero, then we're already a host.
+	// Otherwise restore a minimal host context before panicing.
+	mrs	x0, vttbr_el2
+	cbz	x0, 1f
+
+	mrs	x0, tpidr_el2
+
+	deactivate_traps
+	deactivate_vm
+
+	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x2
+
+	bl __restore_sysregs
+
+1:	adr	x0, __hyp_panic_str
+	adr	x1, 2f
+	ldp	x2, x3, [x1]
+	sub	x0, x0, x2
+	add	x0, x0, x3
+	mrs	x1, spsr_el2
+	mrs	x2, elr_el2
+	mrs	x3, esr_el2
+	mrs	x4, far_el2
+	mrs	x5, hpfar_el2
+	mrs	x6, par_el1
+	mrs	x7, tpidr_el2
+
+	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, lr
+	ldr	lr, =panic
+	msr	elr_el2, lr
+	eret
+
+	.align	3
+2:	.quad	HYP_PAGE_OFFSET
+	.quad	PAGE_OFFSET
+ENDPROC(__kvm_hyp_panic)
+
+__hyp_panic_str:
+	.ascii	"HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+
+	.align	2
+
+ENTRY(kvm_call_hyp)
+	hvc	#0
+	ret
+ENDPROC(kvm_call_hyp)
+
+.macro invalid_vector	label, target
+	.align	2
+\label:
+	b \target
+ENDPROC(\label)
+.endm
+
+	/* None of these should ever happen */
+	invalid_vector	el2t_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2t_error_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el2h_error_invalid, __kvm_hyp_panic
+	invalid_vector	el1_sync_invalid, __kvm_hyp_panic
+	invalid_vector	el1_irq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
+	invalid_vector	el1_error_invalid, __kvm_hyp_panic
+
+el1_sync:					// Guest trapped into EL2
+	push	x0, x1
+	push	x2, x3
+
+	mrs	x1, esr_el2
+	lsr	x2, x1, #ESR_EL2_EC_SHIFT
+
+	cmp	x2, #ESR_EL2_EC_HVC64
+	b.ne	el1_trap
+
+	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
+	cbnz	x3, el1_trap			// called HVC
+
+	/* Here, we're pretty sure the host called HVC. */
+	pop	x2, x3
+	pop	x0, x1
+
+	push	lr, xzr
+
+	/*
+	 * Compute the function address in EL2, and shuffle the parameters.
+	 */
+	kern_hyp_va	x0
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+
+	pop	lr, xzr
+	eret
+
+el1_trap:
+	/*
+	 * x1: ESR
+	 * x2: ESR_EC
+	 */
+	cmp	x2, #ESR_EL2_EC_DABT
+	mov	x0, #ESR_EL2_EC_IABT
+	ccmp	x2, x0, #4, ne
+	b.ne	1f		// Not an abort we care about
+
+	/* This is an abort. Check for permission fault */
+	and	x2, x1, #ESR_EL2_FSC_TYPE
+	cmp	x2, #FSC_PERM
+	b.ne	1f		// Not a permission fault
+
+	/*
+	 * Check for Stage-1 page table walk, which is guaranteed
+	 * to give a valid HPFAR_EL2.
+	 */
+	tbnz	x1, #7, 1f	// S1PTW is set
+
+	/*
+	 * Permission fault, HPFAR_EL2 is invalid.
+	 * Resolve the IPA the hard way using the guest VA.
+	 * Stage-1 translation already validated the memory access rights.
+	 * As such, we can use the EL1 translation regime, and don't have
+	 * to distinguish between EL0 and EL1 access.
+	 */
+	mrs	x2, far_el2
+	at	s1e1r, x2
+	isb
+
+	/* Read result */
+	mrs	x3, par_el1
+	tbnz	x3, #0, 3f		// Bail out if we failed the translation
+	ubfx	x3, x3, #12, #36	// Extract IPA
+	lsl	x3, x3, #4		// and present it like HPFAR
+	b	2f
+
+1:	mrs	x3, hpfar_el2
+	mrs	x2, far_el2
+
+2:	mrs	x0, tpidr_el2
+	str	x1, [x0, #VCPU_ESR_EL2]
+	str	x2, [x0, #VCPU_FAR_EL2]
+	str	x3, [x0, #VCPU_HPFAR_EL2]
+
+	mov	x1, #ARM_EXCEPTION_TRAP
+	b	__kvm_vcpu_return
+
+	/*
+	 * Translation failed. Just return to the guest and
+	 * let it fault again. Another CPU is probably playing
+	 * behind our back.
+	 */
+3:	pop	x2, x3
+	pop	x0, x1
+
+	eret
+
+el1_irq:
+	push	x0, x1
+	push	x2, x3
+	mrs	x0, tpidr_el2
+	mov	x1, #ARM_EXCEPTION_IRQ
+	b	__kvm_vcpu_return
+
+	.ltorg
+
+	.align 11
+
+ENTRY(__kvm_hyp_vector)
+	ventry	el2t_sync_invalid		// Synchronous EL2t
+	ventry	el2t_irq_invalid		// IRQ EL2t
+	ventry	el2t_fiq_invalid		// FIQ EL2t
+	ventry	el2t_error_invalid		// Error EL2t
+
+	ventry	el2h_sync_invalid		// Synchronous EL2h
+	ventry	el2h_irq_invalid		// IRQ EL2h
+	ventry	el2h_fiq_invalid		// FIQ EL2h
+	ventry	el2h_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq				// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync			// Synchronous 32-bit EL1
+	ventry	el1_irq				// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)
+
+__kvm_hyp_code_end:
+	.globl	__kvm_hyp_code_end
+
+	.popsection

From b0353aa3722552c9058737452c9bec86d73ccdad Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:40:41 +0000
Subject: [PATCH 020/287] arm64: KVM: Exit handling

Handle the exit of a VM, decoding the exit reason from HYP mode
and calling the corresponding handler.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit c4b1afd022e93eada6ee4b209be37101cd4b3494)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/handle_exit.c | 119 +++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 arch/arm64/kvm/handle_exit.c

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
new file mode 100644
index 000000000000..c65d1154f969
--- /dev/null
+++ b/arch/arm64/kvm/handle_exit.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/handle_exit.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/*
+	 * Guest called HVC instruction:
+	 * Let it know we don't want that by injecting an undefined exception.
+	 */
+	kvm_debug("hvc: %x (at %08lx)", kvm_vcpu_get_hsr(vcpu) & ((1 << 16) - 1),
+		  *vcpu_pc(vcpu));
+	kvm_debug("         HSR: %8x", kvm_vcpu_get_hsr(vcpu));
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	/* We don't support SMC; don't do that. */
+	kvm_debug("smc: at %08lx", *vcpu_pc(vcpu));
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+/**
+ * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * @vcpu:	the vcpu pointer
+ *
+ * Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_vcpu_block(vcpu);
+	return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+	[ESR_EL2_EC_WFI]	= kvm_handle_wfi,
+	[ESR_EL2_EC_HVC64]	= handle_hvc,
+	[ESR_EL2_EC_SMC64]	= handle_smc,
+	[ESR_EL2_EC_SYS64]	= kvm_handle_sys_reg,
+	[ESR_EL2_EC_IABT]	= kvm_handle_guest_abort,
+	[ESR_EL2_EC_DABT]	= kvm_handle_guest_abort,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
+	    !arm_exit_handlers[hsr_ec]) {
+		kvm_err("Unkown exception class: hsr: %#08x\n",
+			(unsigned int)kvm_vcpu_get_hsr(vcpu));
+		BUG();
+	}
+
+	return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+		       int exception_index)
+{
+	exit_handle_fn exit_handler;
+
+	switch (exception_index) {
+	case ARM_EXCEPTION_IRQ:
+		return 1;
+	case ARM_EXCEPTION_TRAP:
+		/*
+		 * See ARM ARM B1.14.1: "Hyp traps on instructions
+		 * that fail their condition code check"
+		 */
+		if (!kvm_condition_valid(vcpu)) {
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return 1;
+		}
+
+		exit_handler = kvm_get_exit_handler(vcpu);
+
+		return exit_handler(vcpu, run);
+	default:
+		kvm_pr_unimpl("Unsupported exception type: %d",
+			      exception_index);
+		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		return 0;
+	}
+}

From 4f0c6d89a863d5a86d2dea50a9e114559e73d97f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Dec 2012 17:54:54 +0000
Subject: [PATCH 021/287] arm64: KVM: Plug the VGIC

Add support for the in-kernel GIC emulation.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 1f17f3b6044d8a81a74dc6c962b3b38a7336106b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 88 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 0b18c2e1e043..8dc27a367d77 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -306,6 +306,90 @@ __kvm_hyp_code_start:
 	msr	vttbr_el2, xzr
 .endm
 
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro save_vgic_state
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* Save all interesting registers */
+	ldr	w4, [x2, #GICH_HCR]
+	ldr	w5, [x2, #GICH_VMCR]
+	ldr	w6, [x2, #GICH_MISR]
+	ldr	w7, [x2, #GICH_EISR0]
+	ldr	w8, [x2, #GICH_EISR1]
+	ldr	w9, [x2, #GICH_ELRSR0]
+	ldr	w10, [x2, #GICH_ELRSR1]
+	ldr	w11, [x2, #GICH_APR]
+
+	str	w4, [x3, #VGIC_CPU_HCR]
+	str	w5, [x3, #VGIC_CPU_VMCR]
+	str	w6, [x3, #VGIC_CPU_MISR]
+	str	w7, [x3, #VGIC_CPU_EISR]
+	str	w8, [x3, #(VGIC_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_CPU_APR]
+
+	/* Clear GICH_HCR */
+	str	wzr, [x2, #GICH_HCR]
+
+	/* Save list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_CPU_LR
+1:	ldr	w5, [x2], #4
+	str	w5, [x3], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro restore_vgic_state
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* We only restore a minimal set of registers */
+	ldr	w4, [x3, #VGIC_CPU_HCR]
+	ldr	w5, [x3, #VGIC_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_CPU_APR]
+
+	str	w4, [x2, #GICH_HCR]
+	str	w5, [x2, #GICH_VMCR]
+	str	w6, [x2, #GICH_APR]
+
+	/* Restore list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_CPU_LR
+1:	ldr	w5, [x3], #4
+	str	w5, [x2], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+.endm
+
 __save_sysregs:
 	save_sysregs
 	ret
@@ -348,6 +432,8 @@ ENTRY(__kvm_vcpu_run)
 	activate_traps
 	activate_vm
 
+	restore_vgic_state
+
 	// Guest context
 	add	x2, x0, #VCPU_CONTEXT
 
@@ -369,6 +455,8 @@ __kvm_vcpu_return:
 	bl __save_fpsimd
 	bl __save_sysregs
 
+	save_vgic_state
+
 	deactivate_traps
 	deactivate_vm
 

From 615fd459aa42eb0450667fb191c1931a1da749b3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 30 May 2013 18:31:28 +0100
Subject: [PATCH 022/287] ARM: KVM: timer: allow DT matching for ARMv8 cores

ARMv8 cores have the exact same timer as ARMv7 cores. Make sure the
KVM timer code can match it in the device tree.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f61701e0a24a09aa4a44baf24e57dcc5e706afa8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 2d00b2925780..6f485eaf643b 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -195,6 +195,7 @@ static struct notifier_block kvm_timer_cpu_nb = {
 
 static const struct of_device_id arch_timer_of_match[] = {
 	{ .compatible	= "arm,armv7-timer",	},
+	{ .compatible	= "arm,armv8-timer",	},
 	{},
 };
 

From 5b12bf6aa91a284eb32fdb5f02ebc770149dbf50 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Dec 2012 17:52:03 +0000
Subject: [PATCH 023/287] arm64: KVM: Plug the arch timer

Add support for the in-kernel timer emulation.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 003300de6c3e51934fb52eb2677f6f4fb4996cbd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S   | 56 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/reset.c | 12 +++++++++
 2 files changed, 68 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 8dc27a367d77..8b510835b440 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -390,6 +390,60 @@ __kvm_hyp_code_start:
 2:
 .endm
 
+.macro save_timer_state
+	// x0: vcpu pointer
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	mrs	x3, cntv_ctl_el0
+	and	x3, x3, #3
+	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
+	bic	x3, x3, #1		// Clear Enable
+	msr	cntv_ctl_el0, x3
+
+	isb
+
+	mrs	x3, cntv_cval_el0
+	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
+
+1:
+	// Allow physical timer/counter access for the host
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #3
+	msr	cnthctl_el2, x2
+
+	// Clear cntvoff for the host
+	msr	cntvoff_el2, xzr
+.endm
+
+.macro restore_timer_state
+	// x0: vcpu pointer
+	// Disallow physical timer access for the guest
+	// Physical counter access is allowed
+	mrs	x2, cnthctl_el2
+	orr	x2, x2, #1
+	bic	x2, x2, #2
+	msr	cnthctl_el2, x2
+
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va x2
+	ldr	w3, [x2, #KVM_TIMER_ENABLED]
+	cbz	w3, 1f
+
+	ldr	x3, [x2, #KVM_TIMER_CNTVOFF]
+	msr	cntvoff_el2, x3
+	ldr	x2, [x0, #VCPU_TIMER_CNTV_CVAL]
+	msr	cntv_cval_el0, x2
+	isb
+
+	ldr	w2, [x0, #VCPU_TIMER_CNTV_CTL]
+	and	x2, x2, #3
+	msr	cntv_ctl_el0, x2
+1:
+.endm
+
 __save_sysregs:
 	save_sysregs
 	ret
@@ -433,6 +487,7 @@ ENTRY(__kvm_vcpu_run)
 	activate_vm
 
 	restore_vgic_state
+	restore_timer_state
 
 	// Guest context
 	add	x2, x0, #VCPU_CONTEXT
@@ -455,6 +510,7 @@ __kvm_vcpu_return:
 	bl __save_fpsimd
 	bl __save_sysregs
 
+	save_timer_state
 	save_vgic_state
 
 	deactivate_traps
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index f6536a06231a..766150ac76ed 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -23,6 +23,8 @@
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 
+#include <kvm/arm_arch_timer.h>
+
 #include <asm/cputype.h>
 #include <asm/ptrace.h>
 #include <asm/kvm_arm.h>
@@ -36,6 +38,11 @@ static const struct kvm_regs default_regs_reset = {
 			PSR_F_BIT | PSR_D_BIT),
 };
 
+static const struct kvm_irq_level default_vtimer_irq = {
+	.irq	= 27,
+	.level	= 1,
+};
+
 int kvm_arch_dev_ioctl_check_extension(long ext)
 {
 	int r;
@@ -58,11 +65,13 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
+	const struct kvm_irq_level *cpu_vtimer_irq;
 	const struct kvm_regs *cpu_reset;
 
 	switch (vcpu->arch.target) {
 	default:
 		cpu_reset = &default_regs_reset;
+		cpu_vtimer_irq = &default_vtimer_irq;
 		break;
 	}
 
@@ -72,5 +81,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset system registers */
 	kvm_reset_sys_regs(vcpu);
 
+	/* Reset timer */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
 	return 0;
 }

From 5732aca89a79f4f9446145c8d921963c81b4157f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 12 Dec 2012 18:52:05 +0000
Subject: [PATCH 024/287] arm64: KVM: PSCI implementation

Wire the PSCI backend into the exit handling code.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit dcd2e40c1e1cce302498d16d095b0f8a30326f74)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  2 +-
 arch/arm64/include/asm/kvm_psci.h | 23 +++++++++++++++++++++++
 arch/arm64/include/uapi/asm/kvm.h | 16 ++++++++++++++++
 arch/arm64/kvm/handle_exit.c      | 16 +++++++---------
 4 files changed, 47 insertions(+), 10 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_psci.h

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2500eb6a4d2a..2fdeb326c3ee 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 
-#define KVM_VCPU_MAX_FEATURES 0
+#define KVM_VCPU_MAX_FEATURES 1
 
 /* We don't currently support large pages. */
 #define KVM_HPAGE_GFN_SHIFT(x)	0
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
new file mode 100644
index 000000000000..e301a4816355
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_PSCI_H__
+#define __ARM64_KVM_PSCI_H__
+
+bool kvm_psci_call(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ebac919dc0ca..fb60f9037057 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -69,6 +69,8 @@ struct kvm_regs {
 #define KVM_VGIC_V2_DIST_SIZE		0x1000
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
+#define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+
 struct kvm_vcpu_init {
 	__u32 target;
 	__u32 features[7];
@@ -141,6 +143,20 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX		127
 
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE		0x95c1ba5e
+#define KVM_PSCI_FN(n)			(KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND		KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF		KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS		0
+#define KVM_PSCI_RET_NI			((unsigned long)-1)
+#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
+#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+
 #endif
 
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index c65d1154f969..4766b7f3515e 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -24,26 +24,24 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_psci.h>
 
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	/*
-	 * Guest called HVC instruction:
-	 * Let it know we don't want that by injecting an undefined exception.
-	 */
-	kvm_debug("hvc: %x (at %08lx)", kvm_vcpu_get_hsr(vcpu) & ((1 << 16) - 1),
-		  *vcpu_pc(vcpu));
-	kvm_debug("         HSR: %8x", kvm_vcpu_get_hsr(vcpu));
+	if (kvm_psci_call(vcpu))
+		return 1;
+
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	/* We don't support SMC; don't do that. */
-	kvm_debug("smc: at %08lx", *vcpu_pc(vcpu));
+	if (kvm_psci_call(vcpu))
+		return 1;
+
 	kvm_inject_undefined(vcpu);
 	return 1;
 }

From ea4ebae1594250431731dcdacd6da33b865fd3b1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 10 Dec 2012 16:41:44 +0000
Subject: [PATCH 025/287] arm64: KVM: Build system integration

Only the Makefile is plugged in. The Kconfig stuff is in a separate
patch to allow for an easier merge process.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6211753fdfd05af9e08f54c8d0ba3ee516034878)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/Makefile     |  1 +
 arch/arm64/kvm/Makefile | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 arch/arm64/kvm/Makefile

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b6ccf8a36e2d..7ab6b358cc35 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -37,6 +37,7 @@ TEXT_OFFSET := 0x00080000
 export	TEXT_OFFSET GZFLAGS
 
 core-y		+= arch/arm64/kernel/ arch/arm64/mm/
+core-$(CONFIG_KVM) += arch/arm64/kvm/
 libs-y		:= arch/arm64/lib/ $(libs-y)
 libs-y		+= $(LIBGCC)
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
new file mode 100644
index 000000000000..dca110556683
--- /dev/null
+++ b/arch/arm64/kvm/Makefile
@@ -0,0 +1,23 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+KVM=../../../virt/kvm
+ARM=../../../arch/arm/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o
+kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
+
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o

From 8ee55043e2fc5e6c90209f478c70b600dce523a9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 19:17:50 +0000
Subject: [PATCH 026/287] arm64: KVM: define 32bit specific registers

Define the 32bit specific registers (SPSRs, cp15...).

Most CPU registers are directly mapped to a 64bit register
(r0->x0...). Only the SPSRs have separate registers.

cp15 registers are also mapped into their 64bit counterpart in most
cases.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 40033a614ea3db196d57c477ca328f44eb1e4df0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h  | 38 ++++++++++++++++++++++++++++++-
 arch/arm64/include/asm/kvm_host.h |  5 +++-
 arch/arm64/include/uapi/asm/kvm.h |  7 +++++-
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 591ac219964a..c92de4163eba 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,7 +42,43 @@
 #define	TPIDR_EL1	18	/* Thread ID, Privileged */
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
-#define	NR_SYS_REGS	21
+/* 32bit specific registers. Keep them at the end of the range */
+#define	DACR32_EL2	21	/* Domain Access Control Register */
+#define	IFSR32_EL2	22	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	23	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	24	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	25	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	26	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	27
+
+/* 32bit mapping */
+#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
+#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
+#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
+#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
+#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
+#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
+#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
+#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
+#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
+#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
+#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
+#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
+#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
+#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
+#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
+#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
+#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
+#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
+#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
+#define c10_AMAIR	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+#define NR_CP15_REGS	(NR_SYS_REGS * 2)
 
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2fdeb326c3ee..3f5830b3ca3f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -84,7 +84,10 @@ struct kvm_vcpu_fault_info {
 
 struct kvm_cpu_context {
 	struct kvm_regs	gp_regs;
-	u64 sys_regs[NR_SYS_REGS];
+	union {
+		u64 sys_regs[NR_SYS_REGS];
+		u32 cp15[NR_CP15_REGS];
+	};
 };
 
 typedef struct kvm_cpu_context kvm_cpu_context_t;
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index fb60f9037057..5b1110c49df5 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -23,7 +23,12 @@
 #define __ARM_KVM_H__
 
 #define KVM_SPSR_EL1	0
-#define KVM_NR_SPSR	1
+#define KVM_SPSR_SVC	KVM_SPSR_EL1
+#define KVM_SPSR_ABT	1
+#define KVM_SPSR_UND	2
+#define KVM_SPSR_IRQ	3
+#define KVM_SPSR_FIQ	4
+#define KVM_NR_SPSR	5
 
 #ifndef __ASSEMBLY__
 #include <asm/types.h>

From 934f190b989d879c8ef59e26c9d6eb2d10632f0a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 19:40:29 +0000
Subject: [PATCH 027/287] arm64: KVM: 32bit GP register access

Allow access to the 32bit register file through the usual API.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b547631fc64e249a3c507e6ce854642507fa7c1c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h |  15 ++-
 arch/arm64/kvm/Makefile              |   2 +-
 arch/arm64/kvm/regmap.c              | 168 +++++++++++++++++++++++++++
 3 files changed, 183 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/kvm/regmap.c

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 6c1725e93b0b..20a1a3931d8d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -28,6 +28,9 @@
 #include <asm/kvm_mmio.h>
 #include <asm/ptrace.h>
 
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
@@ -49,7 +52,7 @@ static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
 
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 {
-	return false;	/* 32bit? Bahhh... */
+	return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
 }
 
 static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
@@ -64,16 +67,23 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
 
 static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
 {
+	*vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
 }
 
 static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
 {
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_reg32(vcpu, reg_num);
+
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
 }
 
 /* Get vcpu SPSR for current mode */
 static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
 {
+	if (vcpu_mode_is_32bit(vcpu))
+		return vcpu_spsr32(vcpu);
+
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
 }
 
@@ -81,6 +91,9 @@ static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
 {
 	u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
 
+	if (vcpu_mode_is_32bit(vcpu))
+		return mode > COMPAT_PSR_MODE_USR;
+
 	return mode != PSR_MODE_EL0t;
 }
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index dca110556683..a2169ec8d93b 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -15,7 +15,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
new file mode 100644
index 000000000000..bbc6ae32e4af
--- /dev/null
+++ b/arch/arm64/kvm/regmap.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/emulate.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/ptrace.h>
+
+#define VCPU_NR_MODES 6
+#define REG_OFFSET(_reg) \
+	(offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
+
+#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
+
+static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
+	/* USR Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12), USR_REG_OFFSET(13),	USR_REG_OFFSET(14),
+		REG_OFFSET(pc)
+	},
+
+	/* FIQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+		REG_OFFSET(compat_r8_fiq),  /* r8 */
+		REG_OFFSET(compat_r9_fiq),  /* r9 */
+		REG_OFFSET(compat_r10_fiq), /* r10 */
+		REG_OFFSET(compat_r11_fiq), /* r11 */
+		REG_OFFSET(compat_r12_fiq), /* r12 */
+		REG_OFFSET(compat_sp_fiq),  /* r13 */
+		REG_OFFSET(compat_lr_fiq),  /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* IRQ Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_irq), /* r13 */
+		REG_OFFSET(compat_lr_irq), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* SVC Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_svc), /* r13 */
+		REG_OFFSET(compat_lr_svc), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* ABT Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_abt), /* r13 */
+		REG_OFFSET(compat_lr_abt), /* r14 */
+		REG_OFFSET(pc)
+	},
+
+	/* UND Registers */
+	{
+		USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+		USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+		USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+		USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+		USR_REG_OFFSET(12),
+		REG_OFFSET(compat_sp_und), /* r13 */
+		REG_OFFSET(compat_lr_und), /* r14 */
+		REG_OFFSET(pc)
+	},
+};
+
+/*
+ * Return a pointer to the register number valid in the current mode of
+ * the virtual CPU.
+ */
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+	unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+
+	switch (mode) {
+	case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC:
+		mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
+		break;
+
+	case COMPAT_PSR_MODE_ABT:
+		mode = 4;
+		break;
+
+	case COMPAT_PSR_MODE_UND:
+		mode = 5;
+		break;
+
+	case COMPAT_PSR_MODE_SYS:
+		mode = 0;	/* SYS maps to USR */
+		break;
+
+	default:
+		BUG();
+	}
+
+	return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/*
+ * Return the SPSR for the current mode of the virtual CPU.
+ */
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+	switch (mode) {
+	case COMPAT_PSR_MODE_SVC:
+		mode = KVM_SPSR_SVC;
+		break;
+	case COMPAT_PSR_MODE_ABT:
+		mode = KVM_SPSR_ABT;
+		break;
+	case COMPAT_PSR_MODE_UND:
+		mode = KVM_SPSR_UND;
+		break;
+	case COMPAT_PSR_MODE_IRQ:
+		mode = KVM_SPSR_IRQ;
+		break;
+	case COMPAT_PSR_MODE_FIQ:
+		mode = KVM_SPSR_FIQ;
+		break;
+	default:
+		BUG();
+	}
+
+	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+}

From 4129306976df31cf80f13a85d6886e2f1245662b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 19:54:04 +0000
Subject: [PATCH 028/287] arm64: KVM: 32bit conditional execution emulation

As conditional instructions can trap on AArch32, add the thinest
possible emulation layer to keep 32bit guests happy.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 27b190bd9fbfee34536cb858f0b5924d294aac38)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h |  13 ++-
 arch/arm64/kvm/Makefile              |   2 +-
 arch/arm64/kvm/emulate.c             | 158 +++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm64/kvm/emulate.c

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 20a1a3931d8d..eec073875218 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -31,6 +31,9 @@
 unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
 unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
 
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+
 void kvm_inject_undefined(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
@@ -57,12 +60,18 @@ static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 
 static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
 {
-	return true;	/* No conditionals on arm64 */
+	if (vcpu_mode_is_32bit(vcpu))
+		return kvm_condition_valid32(vcpu);
+
+	return true;
 }
 
 static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
 {
-	*vcpu_pc(vcpu) += 4;
+	if (vcpu_mode_is_32bit(vcpu))
+		kvm_skip_instr32(vcpu, is_wide_instr);
+	else
+		*vcpu_pc(vcpu) += 4;
 }
 
 static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index a2169ec8d93b..72a9fd583ad3 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -15,7 +15,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c
new file mode 100644
index 000000000000..124418d17049
--- /dev/null
+++ b/arch/arm64/kvm/emulate.c
@@ -0,0 +1,158 @@
+/*
+ * (not much of an) Emulation layer for 32bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+/*
+ * stolen from arch/arm/kernel/opcodes.c
+ *
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+	0xF0F0,			/* EQ == Z set            */
+	0x0F0F,			/* NE                     */
+	0xCCCC,			/* CS == C set            */
+	0x3333,			/* CC                     */
+	0xFF00,			/* MI == N set            */
+	0x00FF,			/* PL                     */
+	0xAAAA,			/* VS == V set            */
+	0x5555,			/* VC                     */
+	0x0C0C,			/* HI == C set && Z clear */
+	0xF3F3,			/* LS == C clear || Z set */
+	0xAA55,			/* GE == (N==V)           */
+	0x55AA,			/* LT == (N!=V)           */
+	0x0A05,			/* GT == (!Z && (N==V))   */
+	0xF5FA,			/* LE == (Z || (N!=V))    */
+	0xFFFF,			/* AL always              */
+	0			/* NV                     */
+};
+
+static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+{
+	u32 esr = kvm_vcpu_get_hsr(vcpu);
+
+	if (esr & ESR_EL2_CV)
+		return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT;
+
+	return -1;
+}
+
+/*
+ * Check if a trapped instruction should have been executed or not.
+ */
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+{
+	unsigned long cpsr;
+	u32 cpsr_cond;
+	int cond;
+
+	/* Top two bits non-zero?  Unconditional. */
+	if (kvm_vcpu_get_hsr(vcpu) >> 30)
+		return true;
+
+	/* Is condition field valid? */
+	cond = kvm_vcpu_get_condition(vcpu);
+	if (cond == 0xE)
+		return true;
+
+	cpsr = *vcpu_cpsr(vcpu);
+
+	if (cond < 0) {
+		/* This can happen in Thumb mode: examine IT state. */
+		unsigned long it;
+
+		it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+		/* it == 0 => unconditional. */
+		if (it == 0)
+			return true;
+
+		/* The cond for this insn works out as the top 4 bits. */
+		cond = (it >> 4);
+	}
+
+	cpsr_cond = cpsr >> 28;
+
+	if (!((cc_map[cond] >> cpsr_cond) & 1))
+		return false;
+
+	return true;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu:	The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+	unsigned long itbits, cond;
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	bool is_arm = !(cpsr & COMPAT_PSR_T_BIT);
+
+	BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK));
+
+	if (!(cpsr & COMPAT_PSR_IT_MASK))
+		return;
+
+	cond = (cpsr & 0xe000) >> 13;
+	itbits = (cpsr & 0x1c00) >> (10 - 2);
+	itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+	/* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */
+	if ((itbits & 0x7) == 0)
+		itbits = cond = 0;
+	else
+		itbits = (itbits << 1) & 0x1f;
+
+	cpsr &= ~COMPAT_PSR_IT_MASK;
+	cpsr |= cond << 13;
+	cpsr |= (itbits & 0x1c) << (10 - 2);
+	cpsr |= (itbits & 0x3) << 25;
+	*vcpu_cpsr(vcpu) = cpsr;
+}
+
+/**
+ * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * @vcpu: The vcpu pointer
+ */
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+	bool is_thumb;
+
+	is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT);
+	if (is_thumb && !is_wide_instr)
+		*vcpu_pc(vcpu) += 2;
+	else
+		*vcpu_pc(vcpu) += 4;
+	kvm_adjust_itstate(vcpu);
+}

From 33056d384f68642d62ba6dc6fe0a17ef92473135 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 7 Feb 2013 10:32:33 +0000
Subject: [PATCH 029/287] arm64: KVM: 32bit handling of coprocessor traps

Provide the necessary infrastructure to trap coprocessor accesses that
occur when running 32bit guests.

Also wire SMC and HVC trapped in 32bit mode while were at it.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 62a89c44954f09072bf07a714c8f68bda14ab87e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_coproc.h |   5 +
 arch/arm64/kvm/handle_exit.c        |   7 ++
 arch/arm64/kvm/sys_regs.c           | 181 ++++++++++++++++++++++++++--
 3 files changed, 186 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
index 9b4477acb554..9a59301cd014 100644
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -32,11 +32,16 @@ struct kvm_sys_reg_table {
 
 struct kvm_sys_reg_target_table {
 	struct kvm_sys_reg_table table64;
+	struct kvm_sys_reg_table table32;
 };
 
 void kvm_register_target_sys_reg_table(unsigned int target,
 				       struct kvm_sys_reg_target_table *table);
 
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #define kvm_coproc_table_init kvm_sys_reg_table_init
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 4766b7f3515e..9beaca033437 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -62,6 +62,13 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_WFI]	= kvm_handle_wfi,
+	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
+	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
+	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_LS]	= kvm_handle_cp14_load_store,
+	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_HVC32]	= handle_hvc,
+	[ESR_EL2_EC_SMC32]	= handle_smc,
 	[ESR_EL2_EC_HVC64]	= handle_hvc,
 	[ESR_EL2_EC_SMC64]	= handle_smc,
 	[ESR_EL2_EC_SYS64]	= kvm_handle_sys_reg,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 52fff0ae3442..94923609753b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -38,6 +38,10 @@
  * types are different. My gut feeling is that it should be pretty
  * easy to merge, but that would be an ABI breakage -- again. VFP
  * would also need to be abstracted.
+ *
+ * For AArch32, we only take care of what is being trapped. Anything
+ * that has to do with init and userspace access has to go via the
+ * 64bit interface.
  */
 
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
@@ -166,6 +170,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
 	  access_dcsw },
 
+	/* TEECR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEECR32_EL1, 0 },
+	/* TEEHBR32_EL1 */
+	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  NULL, reset_val, TEEHBR32_EL1, 0 },
+	/* DBGVCR32_EL2 */
+	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
+	  NULL, reset_val, DBGVCR32_EL2, 0 },
+
 	/* MPIDR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
 	  NULL, reset_mpidr, MPIDR_EL1 },
@@ -276,6 +290,39 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	/* TPIDRRO_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
 	  NULL, reset_unknown, TPIDRRO_EL0 },
+
+	/* DACR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
+	  NULL, reset_unknown, DACR32_EL2 },
+	/* IFSR32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001),
+	  NULL, reset_unknown, IFSR32_EL2 },
+	/* FPEXC32_EL2 */
+	{ Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000),
+	  NULL, reset_val, FPEXC32_EL2, 0x70 },
+};
+
+/* Trapped cp15 registers */
+static const struct sys_reg_desc cp15_regs[] = {
+	/*
+	 * DC{C,I,CI}SW operations:
+	 */
+	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
 };
 
 /* Target specific emulation tables */
@@ -288,13 +335,20 @@ void kvm_register_target_sys_reg_table(unsigned int target,
 }
 
 /* Get specific register table for this target. */
-static const struct sys_reg_desc *get_target_table(unsigned target, size_t *num)
+static const struct sys_reg_desc *get_target_table(unsigned target,
+						   bool mode_is_64,
+						   size_t *num)
 {
 	struct kvm_sys_reg_target_table *table;
 
 	table = target_tables[target];
-	*num = table->table64.num;
-	return table->table64.table;
+	if (mode_is_64) {
+		*num = table->table64.num;
+		return table->table64.table;
+	} else {
+		*num = table->table32.num;
+		return table->table32.table;
+	}
 }
 
 static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
@@ -322,13 +376,126 @@ static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
 	return NULL;
 }
 
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	kvm_inject_undefined(vcpu);
+	return 1;
+}
+
+static void emulate_cp15(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *params)
+{
+	size_t num;
+	const struct sys_reg_desc *table, *r;
+
+	table = get_target_table(vcpu->arch.target, false, &num);
+
+	/* Search target-specific then generic table. */
+	r = find_reg(params, table, num);
+	if (!r)
+		r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
+
+	if (likely(r)) {
+		/*
+		 * Not having an accessor means that we have
+		 * configured a trap that we don't know how to
+		 * handle. This certainly qualifies as a gross bug
+		 * that should be fixed right away.
+		 */
+		BUG_ON(!r->access);
+
+		if (likely(r->access(vcpu, params, r))) {
+			/* Skip instruction, since it was emulated */
+			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+			return;
+		}
+		/* If access function fails, it should complain. */
+	}
+
+	kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu));
+	print_sys_reg_instr(params);
+	kvm_inject_undefined(vcpu);
+}
+
+/**
+ * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+	int Rt2 = (hsr >> 10) & 0xf;
+
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 16) & 0xf;
+	params.Op2 = 0;
+	params.CRn = 0;
+
+	/*
+	 * Massive hack here. Store Rt2 in the top 32bits so we only
+	 * have one register to deal with. As we use the same trap
+	 * backends between AArch32 and AArch64, we get away with it.
+	 */
+	if (params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val &= 0xffffffff;
+		val |= *vcpu_reg(vcpu, Rt2) << 32;
+		*vcpu_reg(vcpu, params.Rt) = val;
+	}
+
+	emulate_cp15(vcpu, &params);
+
+	/* Do the opposite hack for the read side */
+	if (!params.is_write) {
+		u64 val = *vcpu_reg(vcpu, params.Rt);
+		val >>= 32;
+		*vcpu_reg(vcpu, Rt2) = val;
+	}
+
+	return 1;
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run:  The kvm_run struct
+ */
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	struct sys_reg_params params;
+	u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+	params.CRm = (hsr >> 1) & 0xf;
+	params.Rt  = (hsr >> 5) & 0xf;
+	params.is_write = ((hsr & 1) == 0);
+	params.CRn = (hsr >> 10) & 0xf;
+	params.Op0 = 0;
+	params.Op1 = (hsr >> 14) & 0x7;
+	params.Op2 = (hsr >> 17) & 0x7;
+
+	emulate_cp15(vcpu, &params);
+	return 1;
+}
+
 static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 			   const struct sys_reg_params *params)
 {
 	size_t num;
 	const struct sys_reg_desc *table, *r;
 
-	table = get_target_table(vcpu->arch.target, &num);
+	table = get_target_table(vcpu->arch.target, true, &num);
 
 	/* Search target-specific then generic table. */
 	r = find_reg(params, table, num);
@@ -438,7 +605,7 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
 	if (!index_to_params(id, &params))
 		return NULL;
 
-	table = get_target_table(vcpu->arch.target, &num);
+	table = get_target_table(vcpu->arch.target, true, &num);
 	r = find_reg(&params, table, num);
 	if (!r)
 		r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
@@ -762,7 +929,7 @@ static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
 	size_t num;
 
 	/* We check for duplicates here, to allow arch-specific overrides. */
-	i1 = get_target_table(vcpu->arch.target, &num);
+	i1 = get_target_table(vcpu->arch.target, true, &num);
 	end1 = i1 + num;
 	i2 = sys_reg_descs;
 	end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
@@ -874,7 +1041,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
 	/* Generic chip reset first (so target could override). */
 	reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
 
-	table = get_target_table(vcpu->arch.target, &num);
+	table = get_target_table(vcpu->arch.target, true, &num);
 	reset_sys_reg_descs(vcpu, table, num);
 
 	for (num = 1; num < NR_SYS_REGS; num++)

From a6df8b5dd61c84f691c41663a1d2bb0e05342053 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 7 Feb 2013 10:50:18 +0000
Subject: [PATCH 030/287] arm64: KVM: CPU specific 32bit coprocessor access

Enable handling of CPU specific 32bit coprocessor access. Not much
here either.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 06c7654d2fb8bac7b1af4340ad59434a5d89b86a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs_generic_v8.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index d4e803907312..4268ab9356b1 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -59,11 +59,21 @@ static const struct sys_reg_desc genericv8_sys_regs[] = {
 	  access_actlr, reset_actlr, ACTLR_EL1 },
 };
 
+static const struct sys_reg_desc genericv8_cp15_regs[] = {
+	/* ACTLR */
+	{ Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+	  access_actlr },
+};
+
 static struct kvm_sys_reg_target_table genericv8_target_table = {
 	.table64 = {
 		.table = genericv8_sys_regs,
 		.num = ARRAY_SIZE(genericv8_sys_regs),
 	},
+	.table32 = {
+		.table = genericv8_cp15_regs,
+		.num = ARRAY_SIZE(genericv8_cp15_regs),
+	},
 };
 
 static int __init sys_reg_genericv8_init(void)

From ad0ed2f67ccf23f49c201cb05c542d46e04fb9c1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 7 Feb 2013 10:52:10 +0000
Subject: [PATCH 031/287] arm64: KVM: 32bit specific register world switch

Allow registers specific to 32bit guests to be saved/restored
during the world switch.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b4afad06c19e3489767532f86ff453a1d1e28b8c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 70 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 8b510835b440..ff985e3d8b72 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -266,6 +266,74 @@ __kvm_hyp_code_start:
 	msr	cntkctl_el1,	x23
 .endm
 
+.macro skip_32bit_state tmp, target
+	// Skip 32bit state if not needed
+	mrs	\tmp, hcr_el2
+	tbnz	\tmp, #HCR_RW_SHIFT, \target
+.endm
+
+.macro skip_tee_state tmp, target
+	// Skip ThumbEE state if not needed
+	mrs	\tmp, id_pfr0_el1
+	tbz	\tmp, #12, \target
+.endm
+
+.macro save_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	mrs	x4, spsr_abt
+	mrs	x5, spsr_und
+	mrs	x6, spsr_irq
+	mrs	x7, spsr_fiq
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	mrs	x4, dacr32_el2
+	mrs	x5, ifsr32_el2
+	mrs	x6, fpexc32_el2
+	mrs	x7, dbgvcr32_el2
+	stp	x4, x5, [x3]
+	stp	x6, x7, [x3, #16]
+
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	mrs	x4, teecr32_el1
+	mrs	x5, teehbr32_el1
+	stp	x4, x5, [x3]
+1:
+.endm
+
+.macro restore_guest_32bit_state
+	skip_32bit_state x3, 1f
+
+	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	msr	spsr_abt, x4
+	msr	spsr_und, x5
+	msr	spsr_irq, x6
+	msr	spsr_fiq, x7
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+	ldp	x4, x5, [x3]
+	ldp	x6, x7, [x3, #16]
+	msr	dacr32_el2, x4
+	msr	ifsr32_el2, x5
+	msr	fpexc32_el2, x6
+	msr	dbgvcr32_el2, x7
+
+	skip_tee_state x8, 1f
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+	ldp	x4, x5, [x3]
+	msr	teecr32_el1, x4
+	msr	teehbr32_el1, x5
+1:
+.endm
+
 .macro activate_traps
 	ldr	x2, [x0, #VCPU_IRQ_LINES]
 	ldr	x1, [x0, #VCPU_HCR_EL2]
@@ -494,6 +562,7 @@ ENTRY(__kvm_vcpu_run)
 
 	bl __restore_sysregs
 	bl __restore_fpsimd
+	restore_guest_32bit_state
 	restore_guest_regs
 
 	// That's it, no more messing around.
@@ -509,6 +578,7 @@ __kvm_vcpu_return:
 	save_guest_regs
 	bl __save_fpsimd
 	bl __save_sysregs
+	save_guest_32bit_state
 
 	save_timer_state
 	save_vgic_state

From 31960c92ba1763c3c5db1d09fb0f4547a39c8d4d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Feb 2013 11:29:35 +0000
Subject: [PATCH 032/287] arm64: KVM: 32bit guest fault injection

Add fault injection capability for 32bit guests.

Reviewed-by: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit e82e030556e42e823e174e0c3bd97988d1a09d1f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/inject_fault.c | 79 ++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 54f656271266..81a02a8762b0 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -1,5 +1,5 @@
 /*
- * Fault injection for 64bit guests.
+ * Fault injection for both 32 and 64bit guests.
  *
  * Copyright (C) 2012,2013 - ARM Ltd
  * Author: Marc Zyngier <marc.zyngier@arm.com>
@@ -29,6 +29,74 @@
 				 PSR_I_BIT | PSR_D_BIT)
 #define EL1_EXCEPT_SYNC_OFFSET	0x200
 
+static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+	unsigned long cpsr;
+	unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
+	bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT);
+	u32 return_offset = (is_thumb) ? 4 : 0;
+	u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+
+	cpsr = mode | COMPAT_PSR_I_BIT;
+
+	if (sctlr & (1 << 30))
+		cpsr |= COMPAT_PSR_T_BIT;
+	if (sctlr & (1 << 25))
+		cpsr |= COMPAT_PSR_E_BIT;
+
+	*vcpu_cpsr(vcpu) = cpsr;
+
+	/* Note: These now point to the banked copies */
+	*vcpu_spsr(vcpu) = new_spsr_value;
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+
+	/* Branch to exception vector */
+	if (sctlr & (1 << 13))
+		vect_offset += 0xffff0000;
+	else /* always have security exceptions */
+		vect_offset += vcpu_cp15(vcpu, c12_VBAR);
+
+	*vcpu_pc(vcpu) = vect_offset;
+}
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
+			 unsigned long addr)
+{
+	u32 vect_offset;
+	u32 *far, *fsr;
+	bool is_lpae;
+
+	if (is_pabt) {
+		vect_offset = 12;
+		far = &vcpu_cp15(vcpu, c6_IFAR);
+		fsr = &vcpu_cp15(vcpu, c5_IFSR);
+	} else { /* !iabt */
+		vect_offset = 16;
+		far = &vcpu_cp15(vcpu, c6_DFAR);
+		fsr = &vcpu_cp15(vcpu, c5_DFSR);
+	}
+
+	prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset);
+
+	*far = addr;
+
+	/* Give the guest an IMPLEMENTATION DEFINED exception */
+	is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
+	if (is_lpae)
+		*fsr = 1 << 9 | 0x34;
+	else
+		*fsr = 0x14;
+}
+
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
@@ -98,6 +166,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
  */
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, false, addr);
+
 	inject_abt64(vcpu, false, addr);
 }
 
@@ -111,6 +182,9 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
  */
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_abt32(vcpu, true, addr);
+
 	inject_abt64(vcpu, true, addr);
 }
 
@@ -122,5 +196,8 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
  */
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
+	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+		inject_undef32(vcpu);
+
 	inject_undef64(vcpu);
 }

From 80e531580f7bbb453119afd17bc752d9763676b5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 7 Feb 2013 10:46:46 +0000
Subject: [PATCH 033/287] arm64: KVM: enable initialization of a 32bit vcpu

Wire the init of a 32bit vcpu by allowing 32bit modes in pstate,
and providing sensible defaults out of reset state.

This feature is of course conditioned by the presence of 32bit
capability on the physical CPU, and is checked by the KVM_CAP_ARM_EL1_32BIT
capability.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 0d854a60b1d7d39a37b25dd28f63cfa0df637b91)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  2 +-
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 arch/arm64/kvm/guest.c            |  6 ++++++
 arch/arm64/kvm/reset.c            | 26 +++++++++++++++++++++++++-
 include/uapi/linux/kvm.h          |  1 +
 5 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 3f5830b3ca3f..644d73956864 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 
-#define KVM_VCPU_MAX_FEATURES 1
+#define KVM_VCPU_MAX_FEATURES 2
 
 /* We don't currently support large pages. */
 #define KVM_HPAGE_GFN_SHIFT(x)	0
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5b1110c49df5..5031f4263937 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -75,6 +75,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
 
 struct kvm_vcpu_init {
 	__u32 target;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3d7518a7ebaa..2c3ff67a8ecb 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -99,6 +99,12 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
 		u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK;
 		switch (mode) {
+		case COMPAT_PSR_MODE_USR:
+		case COMPAT_PSR_MODE_FIQ:
+		case COMPAT_PSR_MODE_IRQ:
+		case COMPAT_PSR_MODE_SVC:
+		case COMPAT_PSR_MODE_ABT:
+		case COMPAT_PSR_MODE_UND:
 		case PSR_MODE_EL0t:
 		case PSR_MODE_EL1t:
 		case PSR_MODE_EL1h:
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 766150ac76ed..70a7816535cd 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -38,16 +38,32 @@ static const struct kvm_regs default_regs_reset = {
 			PSR_F_BIT | PSR_D_BIT),
 };
 
+static const struct kvm_regs default_regs_reset32 = {
+	.regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT |
+			COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
+};
+
 static const struct kvm_irq_level default_vtimer_irq = {
 	.irq	= 27,
 	.level	= 1,
 };
 
+static bool cpu_has_32bit_el1(void)
+{
+	u64 pfr0;
+
+	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	return !!(pfr0 & 0x20);
+}
+
 int kvm_arch_dev_ioctl_check_extension(long ext)
 {
 	int r;
 
 	switch (ext) {
+	case KVM_CAP_ARM_EL1_32BIT:
+		r = cpu_has_32bit_el1();
+		break;
 	default:
 		r = 0;
 	}
@@ -70,7 +86,15 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 
 	switch (vcpu->arch.target) {
 	default:
-		cpu_reset = &default_regs_reset;
+		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+			if (!cpu_has_32bit_el1())
+				return -EINVAL;
+			cpu_reset = &default_regs_reset32;
+			vcpu->arch.hcr_el2 &= ~HCR_RW;
+		} else {
+			cpu_reset = &default_regs_reset;
+		}
+
 		cpu_vtimer_irq = &default_vtimer_irq;
 		break;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 97277d333e82..acccd08be6c7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQ_MPIC 90
 #define KVM_CAP_PPC_RTAS 91
 #define KVM_CAP_IRQ_XICS 92
+#define KVM_CAP_ARM_EL1_32BIT 93
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

From d3845bf4d5f6b104ec346dabf58817aa9d66d740 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 Apr 2013 17:46:31 +0100
Subject: [PATCH 034/287] arm64: KVM: userspace API documentation

Unsurprisingly, the arm64 userspace API is extremely similar to
the 32bit one, the only significant difference being the ONE_REG
register mapping.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 379e04c79e8a9ded8a202f1e266f0c5830185bea)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt | 58 +++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 5f91eda91647..9bfadeb8be31 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -280,7 +280,7 @@ kvm_run' (see below).
 4.11 KVM_GET_REGS
 
 Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_regs (out)
 Returns: 0 on success, -1 on error
@@ -301,7 +301,7 @@ struct kvm_regs {
 4.12 KVM_SET_REGS
 
 Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_regs (in)
 Returns: 0 on success, -1 on error
@@ -587,7 +587,7 @@ struct kvm_fpu {
 4.24 KVM_CREATE_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, ARM
+Architectures: x86, ia64, ARM, arm64
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -595,14 +595,14 @@ Returns: 0 on success, -1 on error
 Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM, a GIC is
+only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
 created.
 
 
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm
+Architectures: x86, ia64, arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
@@ -612,9 +612,10 @@ On some architectures it is required that an interrupt controller model has
 been previously created with KVM_CREATE_IRQCHIP.  Note that edge-triggered
 interrupts require the level to be set to 1 and then back to 0.
 
-ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip
-(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
-specific cpus.  The irq field is interpreted like this:
+ARM/arm64 can signal an interrupt either at the CPU level, or at the
+in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
+use PPIs designated for specific cpus.  The irq field is interpreted
+like this:
 
   bits:  | 31 ... 24 | 23  ... 16 | 15    ...    0 |
   field: | irq_type  | vcpu_index |     irq_id     |
@@ -1831,6 +1832,22 @@ ARM 32-bit VFP control registers have the following id bit patterns:
 ARM 64-bit FP registers have the following id bit patterns:
   0x4030 0000 0012 0 <regno:12>
 
+
+arm64 registers are mapped using the lower 32 bits. The upper 16 of
+that is the register group type, or coprocessor number:
+
+arm64 core/FP-SIMD registers have the following id bit patterns. Note
+that the size of the access is variable, as the kvm_regs structure
+contains elements ranging from 32 to 128 bits. The index is a 32bit
+value in the kvm_regs structure seen as a 32bit array.
+  0x60x0 0000 0010 <index into the kvm_regs struct:16>
+
+arm64 CCSIDR registers are demultiplexed by CSSELR value:
+  0x6020 0000 0011 00 <csselr:8>
+
+arm64 system registers have the following id bit patterns:
+  0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
+
 4.69 KVM_GET_ONE_REG
 
 Capability: KVM_CAP_ONE_REG
@@ -2264,7 +2281,7 @@ current state.  "addr" is ignored.
 4.77 KVM_ARM_VCPU_INIT
 
 Capability: basic
-Architectures: arm
+Architectures: arm, arm64
 Type: vcpu ioctl
 Parameters: struct struct kvm_vcpu_init (in)
 Returns: 0 on success; -1 on error
@@ -2283,12 +2300,14 @@ should be created before this ioctl is invoked.
 Possible features:
 	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
 	  Depends on KVM_CAP_ARM_PSCI.
+	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
+	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 
 
 4.78 KVM_GET_REG_LIST
 
 Capability: basic
-Architectures: arm
+Architectures: arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_reg_list (in/out)
 Returns: 0 on success; -1 on error
@@ -2308,7 +2327,7 @@ KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 4.80 KVM_ARM_SET_DEVICE_ADDR
 
 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
-Architectures: arm
+Architectures: arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_arm_device_address (in)
 Returns: 0 on success, -1 on error
@@ -2329,18 +2348,19 @@ can access emulated or directly exposed devices, which the host kernel needs
 to know about. The id field is an architecture specific identifier for a
 specific device.
 
-ARM divides the id field into two parts, a device id and an address type id
-specific to the individual device.
+ARM/arm64 divides the id field into two parts, a device id and an
+address type id specific to the individual device.
 
   bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
   field: |        0x00000000      |     device id   |  addr type id  |
 
-ARM currently only require this when using the in-kernel GIC support for the
-hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id.  When
-setting the base address for the guest's mapping of the VGIC virtual CPU
-and distributor interface, the ioctl must be called after calling
-KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
-this ioctl twice for any of the base addresses will return -EEXIST.
+ARM/arm64 currently only require this when using the in-kernel GIC
+support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
+as the device id.  When setting the base address for the guest's
+mapping of the VGIC virtual CPU and distributor interface, the ioctl
+must be called after calling KVM_CREATE_IRQCHIP, but before calling
+KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
+base addresses will return -EEXIST.
 
 4.82 KVM_PPC_RTAS_DEFINE_TOKEN
 

From 1a12f6e7bcaaf0ec1a6a01fe5fb9341eb2f74ffe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 2 Apr 2013 17:49:40 +0100
Subject: [PATCH 035/287] arm64: KVM: MAINTAINERS update

Elect myself as the KVM/arm64 maintainer.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6394a3ec02ab39147aab9ea56d0dabafd3dcae60)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 MAINTAINERS | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 48c748080c96..823010fce98a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4719,6 +4719,15 @@ F:	arch/arm/include/uapi/asm/kvm*
 F:	arch/arm/include/asm/kvm*
 F:	arch/arm/kvm/
 
+KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
+M:	Marc Zyngier <marc.zyngier@arm.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	kvmarm@lists.cs.columbia.edu
+S:	Maintained
+F:	arch/arm64/include/uapi/asm/kvm*
+F:	arch/arm64/include/asm/kvm*
+F:	arch/arm64/kvm/
+
 KEXEC
 M:	Eric Biederman <ebiederm@xmission.com>
 W:	http://kernel.org/pub/linux/utils/kernel/kexec/

From 092f9fbf3845c1c6e0bb14f3eae1be583c071a7d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 2 May 2013 14:31:03 +0100
Subject: [PATCH 036/287] arm64: KVM: document kernel object mappings in HYP

HYP mode has access to some of the kernel pages. Document the
memory mapping and the offset between kernel VA and HYP VA.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit aa4a73a0a23a65a2f531d01f1865d1e61c6acb55)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/arm64/memory.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index c6941f815f15..d50fa618371b 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -102,3 +102,10 @@ Translation table lookup with 64KB pages:
  |                 |    +--------------------------> [41:29] L2 index (only 38:29 used)
  |                 +-------------------------------> [47:42] L1 index (not used)
  +-------------------------------------------------> [63] TTBR0/1
+
+When using KVM, the hypervisor maps kernel pages in EL2, at a fixed
+offset from the kernel VA (top 24bits of the kernel VA set to zero):
+
+Start			End			Size		Use
+-----------------------------------------------------------------------
+0000004000000000	0000007fffffffff	 256GB		kernel objects mapped in HYP

From 146844e0e0dc7bcb2a080a2fdfd792674bff105b Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 30 Apr 2013 12:02:15 +0530
Subject: [PATCH 037/287] ARM: KVM: Allow host virt timer irq to be different
 from guest timer virt irq

The arch_timer irq numbers (or PPI numbers) are implementation dependent,
so the host virtual timer irq number can be different from guest virtual
timer irq number.

This patch ensures that host virtual timer irq number is read from DTB and
guest virtual timer irq is determined based on vcpu target type.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 5ae7f87a56fab10b8f9b135a8377c144397293ca)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/reset.c         | 12 ++++++++++++
 include/kvm/arm_arch_timer.h |  4 ++++
 virt/kvm/arm/arch_timer.c    | 29 ++++++++++++++++++++---------
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b80256b554cd..b7840e7aa452 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -27,6 +27,8 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_coproc.h>
 
+#include <kvm/arm_arch_timer.h>
+
 /******************************************************************************
  * Cortex-A15 Reset Values
  */
@@ -37,6 +39,11 @@ static struct kvm_regs a15_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
+static const struct kvm_irq_level a15_vtimer_irq = {
+	.irq = 27,
+	.level = 1,
+};
+
 
 /*******************************************************************************
  * Exported reset function
@@ -52,6 +59,7 @@ static struct kvm_regs a15_regs_reset = {
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_regs *cpu_reset;
+	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
 	case KVM_ARM_TARGET_CORTEX_A15:
@@ -59,6 +67,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 			return -EINVAL;
 		cpu_reset = &a15_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
+		cpu_vtimer_irq = &a15_vtimer_irq;
 		break;
 	default:
 		return -ENODEV;
@@ -70,5 +79,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	/* Reset CP15 registers */
 	kvm_reset_coprocs(vcpu);
 
+	/* Reset arch_timer context */
+	kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
 	return 0;
 }
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 68cb9e1dfb81..6d9aeddc09bf 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -61,6 +61,8 @@ struct arch_timer_cpu {
 #ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
 int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+			  const struct kvm_irq_level *irq);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
@@ -76,6 +78,8 @@ static inline int kvm_timer_init(struct kvm *kvm)
 	return 0;
 }
 
+static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+					const struct kvm_irq_level *irq) {}
 static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 6f485eaf643b..c2e1ef4604e8 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -30,9 +30,7 @@
 
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
-static struct kvm_irq_level timer_irq = {
-	.level	= 1,
-};
+static unsigned int host_vtimer_irq;
 
 static cycle_t kvm_phys_timer_read(void)
 {
@@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 
 	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
 	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-			    vcpu->arch.timer_cpu.irq->irq,
-			    vcpu->arch.timer_cpu.irq->level);
+			    timer->irq->irq,
+			    timer->irq->level);
 }
 
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 	timer_arm(timer, ns);
 }
 
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+			  const struct kvm_irq_level *irq)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	/*
+	 * The vcpu timer irq number cannot be determined in
+	 * kvm_timer_vcpu_init() because it is called much before
+	 * kvm_vcpu_set_target(). To handle this, we determine
+	 * vcpu timer irq number when the vcpu is reset.
+	 */
+	timer->irq = irq;
+}
+
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -163,12 +175,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 	INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
 	hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	timer->timer.function = kvm_timer_expire;
-	timer->irq = &timer_irq;
 }
 
 static void kvm_timer_init_interrupt(void *info)
 {
-	enable_percpu_irq(timer_irq.irq, 0);
+	enable_percpu_irq(host_vtimer_irq, 0);
 }
 
 
@@ -182,7 +193,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self,
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
-		disable_percpu_irq(timer_irq.irq);
+		disable_percpu_irq(host_vtimer_irq);
 		break;
 	}
 
@@ -230,7 +241,7 @@ int kvm_timer_hyp_init(void)
 		goto out;
 	}
 
-	timer_irq.irq = ppi;
+	host_vtimer_irq = ppi;
 
 	err = register_cpu_notifier(&kvm_timer_cpu_nb);
 	if (err) {

From 2da084abe92d8aa40c8972c461114528c336ab54 Mon Sep 17 00:00:00 2001
From: Dave P Martin <Dave.Martin@arm.com>
Date: Wed, 1 May 2013 17:49:28 +0100
Subject: [PATCH 038/287] ARM: KVM: Don't handle PSCI calls via SMC

Currently, kvmtool unconditionally declares that HVC should be used
to call PSCI, so the function numbers in the DT tell the guest
nothing about the function ID namespace or calling convention for
SMC.

We already assume that the guest will examine and honour the DT,
since there is no way it could possibly guess the KVM-specific PSCI
function IDs otherwise.  So let's not encourage guests to violate
what's specified in the DT by using SMC to make the call.

[ Modified to apply to top of kvm/arm tree - Christoffer ]

Signed-off-by: Dave P Martin <Dave.Martin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 24a7f675752e06729589d40a5256970998a21502)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/handle_exit.c | 3 ---
 arch/arm/kvm/psci.c        | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 3d74a0be47db..df4c82d47ad7 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -52,9 +52,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
-
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 7ee5bb7a3667..86a693a02ba3 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -75,7 +75,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
  * kvm_psci_call - handle PSCI call if r0 value is in range
  * @vcpu: Pointer to the VCPU struct
  *
- * Handle PSCI calls from guests through traps from HVC or SMC instructions.
+ * Handle PSCI calls from guests through traps from HVC instructions.
  * The calling convention is similar to SMC calls to the secure world where
  * the function number is placed in r0 and this function returns true if the
  * function number specified in r0 is withing the PSCI range, and false

From fd741ad3dd186a0df45994059de3a08517a856bc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 12:11:35 +0100
Subject: [PATCH 039/287] ARM: KVM: remove dead prototype for
 __kvm_tlb_flush_vmid

__kvm_tlb_flush_vmid has been renamed to __kvm_tlb_flush_vmid_ipa,
and the old prototype should have been removed when the code was
modified.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 368074d908b785588778f00b4384376cd636f4a1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 4bb08e3e52bc..a2f43ddcc300 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -74,8 +74,6 @@ extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 

From 3be39e7f5aaca996a006717658c517650f345476 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 12:11:37 +0100
Subject: [PATCH 040/287] ARM: KVM: use phys_addr_t instead of unsigned long
 long for HYP PGDs

HYP PGDs are passed around as phys_addr_t, except just before calling
into the hypervisor init code, where they are cast to a rather weird
unsigned long long.

Just keep them around as phys_addr_t, which is what makes the most
sense.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit dac288f7b38a7439502b77dabcdf8a9a5c4ae721)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h | 4 ++--
 arch/arm/kvm/arm.c              | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ff5aaf10e6ec..1f3cee2e210e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -190,8 +190,8 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);
 
-static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
-				       unsigned long long pgd_ptr,
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+				       phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ef1703b9587b..741f66a2edbd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -800,8 +800,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 static void cpu_init_hyp_mode(void *dummy)
 {
-	unsigned long long boot_pgd_ptr;
-	unsigned long long pgd_ptr;
+	phys_addr_t boot_pgd_ptr;
+	phys_addr_t pgd_ptr;
 	unsigned long hyp_stack_ptr;
 	unsigned long stack_page;
 	unsigned long vector_ptr;
@@ -809,8 +809,8 @@ static void cpu_init_hyp_mode(void *dummy)
 	/* Switch from the HYP stub to our own HYP init vector */
 	__hyp_set_vectors(kvm_get_idmap_vector());
 
-	boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
-	pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
+	boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
 	vector_ptr = (unsigned long)__kvm_hyp_vector;

From a7232858be9e29371ef8ed2a39658443344b7765 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 12:11:38 +0100
Subject: [PATCH 041/287] ARM: KVM: don't special case PC when doing an MMIO

Admitedly, reading a MMIO register to load PC is very weird.
Writing PC to a MMIO register is probably even worse. But
the architecture doesn't forbid any of these, and injecting
a Prefetch Abort is the wrong thing to do anyway.

Remove this check altogether, and let the adventurous guest
wander into LaLaLand if they feel compelled to do so.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 8734f16fb2aa4ff0bb57ad6532661a38bc8ff957)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h | 5 -----
 arch/arm/kvm/mmio.c                | 6 ------
 2 files changed, 11 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 82b4babead2c..a464e8d7b6c5 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
 	return cpsr_mode > USR_MODE;;
 }
 
-static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
-{
-	return reg == 15;
-}
-
 static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault.hsr;
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 72a12f2171b2..b8e06b7a2833 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -86,12 +86,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	sign_extend = kvm_vcpu_dabt_issext(vcpu);
 	rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-	if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
-		/* IO memory trying to read/write pc */
-		kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
-		return 1;
-	}
-
 	mmio->is_write = is_write;
 	mmio->phys_addr = fault_ipa;
 	mmio->len = len;

From 99f2cf12576cffa1a0168b0cc05216de54ca12f4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 May 2013 12:11:39 +0100
Subject: [PATCH 042/287] ARM: KVM: get rid of S2_PGD_SIZE

S2_PGD_SIZE defines the number of pages used by a stage-2 PGD
and is unused, except for a VM_BUG_ON check that missuses the
define.

As the check is very unlikely to ever triggered except in
circumstances where KVM is the least of our worries, just kill
both the define and the VM_BUG_ON check.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@cs.columbia.edu>
(cherry picked from commit 4db845c3d8e2f8a219e8ac48834dd4fe085e5d63)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h | 1 -
 arch/arm/kvm/mmu.c             | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 124623e5ef14..64e96960de29 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -135,7 +135,6 @@
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
 #define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
 #define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
-#define S2_PGD_SIZE	(1 << S2_PGD_ORDER)
 
 /* Virtualization Translation Control Register (VTCR) bits */
 #define VTCR_SH0	(3 << 12)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 84ba67b982c0..ca6bea4859b4 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -382,9 +382,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
 	if (!pgd)
 		return -ENOMEM;
 
-	/* stage-2 pgd must be aligned to its size */
-	VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
-
 	memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
 	kvm_clean_pgd(pgd);
 	kvm->arch.pgd = pgd;

From d144c1d2cd86381c80187f5ccee5eb27dad6b21b Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Thu, 6 Jun 2013 18:02:54 -0700
Subject: [PATCH 043/287] arm/kvm: Cleanup KVM_ARM_MAX_VCPUS logic

Commit d21a1c83c7595e387545632e44cd7797b76e19cc (ARM: KVM: define KVM_ARM_MAX_VCPUS
unconditionally) changed the Kconfig logic for KVM_ARM_MAX_VCPUS to work around a
build error arising from the use of KVM_ARM_MAX_VCPUS when CONFIG_KVM=n.  The
resulting Kconfig logic is a bit awkward and leaves a KVM_ARM_MAX_VCPUS always
defined in the kernel config file.

This change reverts the Kconfig logic back and adds a simple preprocessor
conditional in kvm_host.h to handle when CONFIG_KVM_ARM_MAX_VCPUS is undefined.

Signed-off-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit f2dda9d829818b055510187059cdfa4ece10c82d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h | 5 +++++
 arch/arm/kvm/Kconfig            | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 1f3cee2e210e..7d22517d8071 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -25,7 +25,12 @@
 #include <asm/fpstate.h>
 #include <kvm/arm_arch_timer.h>
 
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 370e1a8af6ac..49dd64e579c2 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -41,9 +41,9 @@ config KVM_ARM_HOST
 	  Provides host support for ARM processors.
 
 config KVM_ARM_MAX_VCPUS
-	int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
-	default 4 if KVM_ARM_HOST
-	default 0
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
 	help
 	  Static number of max supported virtual CPUs per VM.
 

From 8847470177f0d453fe41bc8de72b89a41a264830 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 21 Jun 2013 22:33:22 +0200
Subject: [PATCH 044/287] ARM: kvm: don't include drivers/virtio/Kconfig

The virtio configuration has recently moved and is now visible everywhere.
Including the file again from KVM as we used to need earlier now causes
dependency problems:

warning: (CAIF_VIRTIO && VIRTIO_PCI && VIRTIO_MMIO && REMOTEPROC && RPMSG)
selects VIRTIO which has unmet direct dependencies (VIRTUALIZATION)

Cc: Christoffer Dall <cdall@cs.columbia.edu>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 8bd4ffd6b3a98f00267051dc095076ea2ff06ea8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Kconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 49dd64e579c2..ebf5015508b5 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -67,6 +67,4 @@ config KVM_ARM_TIMER
 	---help---
 	  Adds support for the Architected Timers in virtual machines
 
-source drivers/virtio/Kconfig
-
 endif # VIRTUALIZATION

From 54993349e584212be7d554a2188f6fef3efeb701 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 4 Jul 2013 13:34:32 +0100
Subject: [PATCH 045/287] arm64: KVM: Kconfig integration

Finally plug KVM/arm64 into the config system, making it possible
to enable KVM support on AArch64 CPUs.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c3eb5b14449a0949e9764d39374a2ea63faae14f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/Kconfig              |  2 ++
 arch/arm64/kernel/asm-offsets.c |  1 +
 arch/arm64/kvm/Kconfig          | 51 +++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+)
 create mode 100644 arch/arm64/kvm/Kconfig

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 43068cf44c2d..de9eeb43f622 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -275,6 +275,8 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
+source "arch/arm64/kvm/Kconfig"
+
 source "arch/arm64/Kconfig.debug"
 
 source "security/Kconfig"
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 49c162c03b69..666e231d410b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
+#include <linux/kvm_host.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/cputable.h>
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
new file mode 100644
index 000000000000..21e90820bd23
--- /dev/null
+++ b/arch/arm64/kvm/Kconfig
@@ -0,0 +1,51 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	---help---
+	  Say Y here to get to see options for using your Linux host to run
+	  other operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	bool "Kernel-based Virtual Machine (KVM) support"
+	select MMU_NOTIFIER
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select KVM_MMIO
+	select KVM_ARM_HOST
+	select KVM_ARM_VGIC
+	select KVM_ARM_TIMER
+	---help---
+	  Support hosting virtualized guest machines.
+
+	  If unsure, say N.
+
+config KVM_ARM_HOST
+	bool
+	---help---
+	  Provides host support for ARM processors.
+
+config KVM_ARM_VGIC
+	bool
+	depends on KVM_ARM_HOST && OF
+	select HAVE_KVM_IRQCHIP
+	---help---
+	  Adds support for a hardware assisted, in-kernel GIC emulation.
+
+config KVM_ARM_TIMER
+	bool
+	depends on KVM_ARM_VGIC
+	---help---
+	  Adds support for the Architected Timers in virtual machines.
+
+endif # VIRTUALIZATION

From a043deb0317044bdeda876d2170de0b24c2bfba8 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Thu, 4 Jul 2013 13:40:29 +0900
Subject: [PATCH 046/287] KVM: Introduce kvm_arch_memslots_updated()

This is called right after the memslots is updated, i.e. when the result
of update_memslots() gets installed in install_new_memslots().  Since
the memslots needs to be updated twice when we delete or move a memslot,
kvm_arch_commit_memory_region() does not correspond to this exactly.

In the following patch, x86 will use this new API to check if the mmio
generation has reached its maximum value, in which case mmio sptes need
to be flushed out.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Acked-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e59dbe09f8e6fb8f6ee19dc79d1a2f14299e4cd2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c         | 4 ++++
 arch/ia64/kvm/kvm-ia64.c   | 4 ++++
 arch/mips/kvm/kvm_mips.c   | 4 ++++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 4 ++++
 include/linux/kvm_host.h   | 1 +
 virt/kvm/kvm_main.c        | 5 ++++-
 8 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 741f66a2edbd..9c697db2787e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -219,6 +219,10 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5b2dc0d10c8f..bdfd8789b376 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1560,6 +1560,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
 		struct kvm_userspace_memory_region *mem,
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index dd203e59e6fd..a7b044536de4 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -208,6 +208,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                 struct kvm_memory_slot *memslot,
                                 struct kvm_userspace_memory_region *mem,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6316ee336e88..ae63ae4a1a5f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -420,6 +420,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return kvmppc_core_create_memslot(slot, npages);
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 698fb826e149..e515b2d4a947 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -981,6 +981,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return 0;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8ba99c34180..894003d79d22 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6976,6 +6976,10 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 	return -ENOMEM;
 }
 
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8db53cfaccdb..15018c572ac5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -497,6 +497,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b547a1ceecbc..7ca9939ee9a7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -731,7 +731,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	update_memslots(slots, new, kvm->memslots->generation);
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
-	return old_memslots; 
+
+	kvm_arch_memslots_updated(kvm);
+
+	return old_memslots;
 }
 
 /*

From 3b180da00d212c6968ab821dcf85f3776929a44d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 6 Aug 2013 13:50:54 -0700
Subject: [PATCH 047/287] ARM: KVM: Fix unaligned unmap_range leak

The unmap_range function did not properly cover the case when the start
address was not aligned to PMD_SIZE or PUD_SIZE and an entire pte table
or pmd table was cleared, causing us to leak memory when incrementing
the addr.

The fix is to always move onto the next page table entry boundary
instead of adding the full size of the VA range covered by the
corresponding table level entry.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit d3840b26614d8ce3db53c98061d9fcb1b9ccb0dd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ca6bea4859b4..80a83ec4a9ae 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -132,37 +132,37 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long long addr = start, end = start + size;
-	u64 range;
+	u64 next;
 
 	while (addr < end) {
 		pgd = pgdp + pgd_index(addr);
 		pud = pud_offset(pgd, addr);
 		if (pud_none(*pud)) {
-			addr += PUD_SIZE;
+			addr = pud_addr_end(addr, end);
 			continue;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
-			addr += PMD_SIZE;
+			addr = pmd_addr_end(addr, end);
 			continue;
 		}
 
 		pte = pte_offset_kernel(pmd, addr);
 		clear_pte_entry(kvm, pte, addr);
-		range = PAGE_SIZE;
+		next = addr + PAGE_SIZE;
 
 		/* If we emptied the pte, walk back up the ladder */
 		if (pte_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
-			range = PMD_SIZE;
+			next = pmd_addr_end(addr, end);
 			if (pmd_empty(pmd)) {
 				clear_pud_entry(kvm, pud, addr);
-				range = PUD_SIZE;
+				next = pud_addr_end(addr, end);
 			}
 		}
 
-		addr += range;
+		addr = next;
 	}
 }
 

From ce7fc7403b536adb6b0871f3e4d07a4da08ad63b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 6 Aug 2013 13:05:48 +0100
Subject: [PATCH 048/287] arm64: KVM: fix 2-level page tables unmapping

When using 64kB pages, we only have two levels of page tables,
meaning that PGD, PUD and PMD are fused. In this case, trying
to refcount PUDs and PMDs independently is a a complete disaster,
as they are the same.

We manage to get it right for the allocation (stage2_set_pte uses
{pmd,pud}_none), but the unmapping path clears both pud and pmd
refcounts, which fails spectacularly with 2-level page tables.

The fix is to avoid calling clear_pud_entry when both the pmd and
pud pages are empty. For this, and instead of introducing another
pud_empty function, consolidate both pte_empty and pmd_empty into
page_empty (the code is actually identical) and use that to also
test the validity of the pud.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 979acd5e18c3e5cb7e3308c699d79553af5af8c6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 80a83ec4a9ae..0988d9e04dd4 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -85,6 +85,12 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
+static bool page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table = pmd_offset(pud, 0);
@@ -103,12 +109,6 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 	put_page(virt_to_page(pmd));
 }
 
-static bool pmd_empty(pmd_t *pmd)
-{
-	struct page *pmd_page = virt_to_page(pmd);
-	return page_count(pmd_page) == 1;
-}
-
 static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
 {
 	if (pte_present(*pte)) {
@@ -118,12 +118,6 @@ static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
 	}
 }
 
-static bool pte_empty(pte_t *pte)
-{
-	struct page *pte_page = virt_to_page(pte);
-	return page_count(pte_page) == 1;
-}
-
 static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 			unsigned long long start, u64 size)
 {
@@ -153,10 +147,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 		next = addr + PAGE_SIZE;
 
 		/* If we emptied the pte, walk back up the ladder */
-		if (pte_empty(pte)) {
+		if (page_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
 			next = pmd_addr_end(addr, end);
-			if (pmd_empty(pmd)) {
+			if (page_empty(pmd) && !page_empty(pud)) {
 				clear_pud_entry(kvm, pud, addr);
 				next = pud_addr_end(addr, end);
 			}

From ebd362b7533b7866928c9bca7727c48558aedbd2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 7 Jun 2013 11:02:34 +0100
Subject: [PATCH 049/287] arm64: KVM: perform save/restore of PAR_EL1

Not saving PAR_EL1 is an unfortunate oversight. If the guest
performs an AT* operation and gets scheduled out before reading
the result of the translation from PAREL1, it could become
corrupted by another guest or the host.

Saving this register is made slightly more complicated as KVM also
uses it on the permission fault handling path, leading to an ugly
"stash and restore" sequence. Fortunately, this is already a slow
path so we don't really care. Also, Linux doesn't do any AT*
operation, so Linux guests are not impacted by this bug.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 1bbd80549810637b7381ab0649ba7c7d62f1342a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h | 17 ++++++++++-------
 arch/arm64/kvm/hyp.S             | 10 ++++++++++
 arch/arm64/kvm/sys_regs.c        |  3 +++
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index c92de4163eba..b25763bc0ec4 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,14 +42,15 @@
 #define	TPIDR_EL1	18	/* Thread ID, Privileged */
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+#define	PAR_EL1		21	/* Physical Address Register */
 /* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	21	/* Domain Access Control Register */
-#define	IFSR32_EL2	22	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	23	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	24	/* Debug Vector Catch Register */
-#define	TEECR32_EL1	25	/* ThumbEE Configuration Register */
-#define	TEEHBR32_EL1	26	/* ThumbEE Handler Base Register */
-#define	NR_SYS_REGS	27
+#define	DACR32_EL2	22	/* Domain Access Control Register */
+#define	IFSR32_EL2	23	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	24	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	25	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	26	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	27	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	28
 
 /* 32bit mapping */
 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
@@ -69,6 +70,8 @@
 #define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
 #define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
 #define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
 #define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
 #define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
 #define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index ff985e3d8b72..218802f68b20 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -214,6 +214,7 @@ __kvm_hyp_code_start:
 	mrs	x21,	tpidr_el1
 	mrs	x22, 	amair_el1
 	mrs	x23, 	cntkctl_el1
+	mrs	x24,	par_el1
 
 	stp	x4, x5, [x3]
 	stp	x6, x7, [x3, #16]
@@ -225,6 +226,7 @@ __kvm_hyp_code_start:
 	stp	x18, x19, [x3, #112]
 	stp	x20, x21, [x3, #128]
 	stp	x22, x23, [x3, #144]
+	str	x24, [x3, #160]
 .endm
 
 .macro restore_sysregs
@@ -243,6 +245,7 @@ __kvm_hyp_code_start:
 	ldp	x18, x19, [x3, #112]
 	ldp	x20, x21, [x3, #128]
 	ldp	x22, x23, [x3, #144]
+	ldr	x24, [x3, #160]
 
 	msr	vmpidr_el2,	x4
 	msr	csselr_el1,	x5
@@ -264,6 +267,7 @@ __kvm_hyp_code_start:
 	msr	tpidr_el1,	x21
 	msr	amair_el1,	x22
 	msr	cntkctl_el1,	x23
+	msr	par_el1,	x24
 .endm
 
 .macro skip_32bit_state tmp, target
@@ -753,6 +757,10 @@ el1_trap:
 	 */
 	tbnz	x1, #7, 1f	// S1PTW is set
 
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	push	x3, xzr
+
 	/*
 	 * Permission fault, HPFAR_EL2 is invalid.
 	 * Resolve the IPA the hard way using the guest VA.
@@ -766,6 +774,8 @@ el1_trap:
 
 	/* Read result */
 	mrs	x3, par_el1
+	pop	x0, xzr			// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
 	tbnz	x3, #0, 3f		// Bail out if we failed the translation
 	ubfx	x3, x3, #12, #36	// Extract IPA
 	lsl	x3, x3, #4		// and present it like HPFAR
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 94923609753b..02e9d09e1d80 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -211,6 +211,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	/* FAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
 	  NULL, reset_unknown, FAR_EL1 },
+	/* PAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
+	  NULL, reset_unknown, PAR_EL1 },
 
 	/* PMINTENSET_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),

From e336bcc2613d7332083819ac8f148313e9471f59 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 11 Jun 2013 18:05:25 +0100
Subject: [PATCH 050/287] arm64: KVM: add missing dsb before invalidating
 Stage-2 TLBs

When performing a Stage-2 TLB invalidation, it is necessary to
make sure the write to the page tables is observable by all CPUs.

For this purpose, add dsb instructions to __kvm_tlb_flush_vmid_ipa
and __kvm_flush_vm_context before doing the TLB invalidation itself.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f142e5eeb724cfbedd203b32b3b542d78dbe2545)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 218802f68b20..1ac0bbbdddb2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -604,6 +604,8 @@ END(__kvm_vcpu_run)
 
 // void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 ENTRY(__kvm_tlb_flush_vmid_ipa)
+	dsb	ishst
+
 	kern_hyp_va	x0
 	ldr	x2, [x0, #KVM_VTTBR]
 	msr	vttbr_el2, x2
@@ -625,6 +627,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 ENDPROC(__kvm_tlb_flush_vmid_ipa)
 
 ENTRY(__kvm_flush_vm_context)
+	dsb	ishst
 	tlbi	alle1is
 	ic	ialluis
 	dsb	sy

From fbcac5446f1c5be6e54e282e1dbed74c58671a72 Mon Sep 17 00:00:00 2001
From: Chen Gang <gang.chen@asianux.com>
Date: Mon, 22 Jul 2013 04:40:38 +0100
Subject: [PATCH 051/287] arm64: KVM: use 'int' instead of 'u32' for variable
 'target' in kvm_host.h.

'target' will be set to '-1' in kvm_arch_vcpu_init(), and it need check
'target' whether less than zero or not in kvm_vcpu_initialized().

So need define target as 'int' instead of 'u32', just like ARM has done.

The related warning:

  arch/arm64/kvm/../../../arch/arm/kvm/arm.c:497:2: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits]

Signed-off-by: Chen Gang <gang.chen@asianux.com>
[Marc: reformated the Subject line to fit the series]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6c8c0c4dc0e98ee2191211d66e9f876e95787073)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 644d73956864..0859a4ddd1e7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -129,7 +129,7 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
 	/* Target CPU and feature flags */
-	u32 target;
+	int target;
 	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
 
 	/* Detect first run of a vcpu */

From 54118be422ac3ff0af613676d47f8d46cc9f8801 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 29 Jul 2013 20:46:04 -0700
Subject: [PATCH 052/287] KVM: ARM: Squash len warning

The 'len' variable was declared an unsigned and then checked for less
than 0, which results in warnings on some compilers.  Since len is
assigned an int, make it an int.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 2184a60de26b94bc5a88de3e5a960ef9ff54ba5a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index b8e06b7a2833..0c25d9487d53 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -63,7 +63,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		      struct kvm_exit_mmio *mmio)
 {
-	unsigned long rt, len;
+	unsigned long rt;
+	int len;
 	bool is_write, sign_extend;
 
 	if (kvm_vcpu_dabt_isextabt(vcpu)) {

From d829a739332e33ffc9753c607325f461e9c994d8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 13 May 2013 12:08:06 +0100
Subject: [PATCH 053/287] ARM: kvm: use inner-shareable barriers after TLB
 flushing

When flushing the TLB at PL2 in response to remapping at stage-2 or VMID
rollover, we have a dsb instruction to ensure completion of the command
before continuing.

Since we only care about other processors for TLB invalidation, use the
inner-shareable variant of the dsb instruction instead.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit e3ab547f57bd626201d4b715b696c80ad1ef4ba2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/init.S       | 2 +-
 arch/arm/kvm/interrupts.S | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index f048338135f7..1b9844d369cc 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -142,7 +142,7 @@ target:	@ We're now in the trampoline code, switch page tables
 
 	@ Invalidate the old TLBs
 	mcr	p15, 4, r0, c8, c7, 0	@ TLBIALLH
-	dsb
+	dsb	ish
 
 	eret
 
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 16cd4ba5d7fd..f85052facffc 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -55,7 +55,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
 	isb
 	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
-	dsb
+	dsb	ish
 	isb
 	mov	r2, #0
 	mov	r3, #0
@@ -79,7 +79,7 @@ ENTRY(__kvm_flush_vm_context)
 	mcr     p15, 4, r0, c8, c3, 4
 	/* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
 	mcr     p15, 0, r0, c7, c1, 0
-	dsb
+	dsb	ish
 	isb				@ Not necessary if followed by eret
 
 	bx	lr

From 678b5999d8ded1bff60ef8e3bfb95c93d63c5ebc Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 6 Aug 2013 05:34:16 +0100
Subject: [PATCH 054/287] ARM: 7808/1: KVM: mm: Get rid of L_PTE_USER ref from
 PAGE_S2_DEVICE

THe L_PTE_USER actually has nothing to do with stage 2 mappings and the
L_PTE_S2_RDWR value sets the readable bit, which was what L_PTE_USER
was used for before proper handling of stage 2 memory defines.

Changelog:
  [v3]: Drop call to kvm_set_s2pte_writable in mmu.c
  [v2]: Change default mappings to be r/w instead of r/o, as per Marc
     Zyngier's suggestion.

Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
(cherry picked from commit 8947c09d05da9f0436f423518f449beaa5ea1bdc)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/pgtable.h | 2 +-
 arch/arm/kvm/mmu.c             | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 9bcd262a9008..8afc60c55e82 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -97,7 +97,7 @@ extern pgprot_t		pgprot_s2_device;
 #define PAGE_HYP		_MOD_PROT(pgprot_kernel, L_PTE_HYP)
 #define PAGE_HYP_DEVICE		_MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
 #define PAGE_S2			_MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE		_MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR)
 
 #define __PAGE_NONE		__pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
 #define __PAGE_SHARED		__pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 0988d9e04dd4..b0de86b56c13 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -489,7 +489,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 
 	for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
 		pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
-		kvm_set_s2pte_writable(&pte);
 
 		ret = mmu_topup_memory_cache(&cache, 2, 2);
 		if (ret)

From effab3928a14e5574f32801eb665ce7c6a98ef14 Mon Sep 17 00:00:00 2001
From: Yann Droneaud <ydroneaud@opteya.com>
Date: Sat, 24 Aug 2013 22:14:07 +0200
Subject: [PATCH 055/287] kvm: use anon_inode_getfd() with O_CLOEXEC flag

KVM uses anon_inode_get() to allocate file descriptors as part
of some of its ioctls. But those ioctls are lacking a flag argument
allowing userspace to choose options for the newly opened file descriptor.

In such case it's advised to use O_CLOEXEC by default so that
userspace is allowed to choose, without race, if the file descriptor
is going to be inherited across exec().

This patch set O_CLOEXEC flag on all file descriptors created
with anon_inode_getfd() to not leak file descriptors across exec().

Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Link: http://lkml.kernel.org/r/cover.1377372576.git.ydroneaud@opteya.com
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 24009b0549de563006705b9af8694fc8fc9a5aa1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7ca9939ee9a7..b5afee319073 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1896,7 +1896,7 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
+	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
 }
 
 /*
@@ -2305,7 +2305,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 		return ret;
 	}
 
-	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
+	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
 	if (ret < 0) {
 		ops->destroy(dev);
 		return ret;
@@ -2589,7 +2589,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 		return r;
 	}
 #endif
-	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+	r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
 	if (r < 0)
 		kvm_put_kvm(kvm);
 

From 4ace5f4542de34fb99068fa2285947d6bbfee26a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 25 Jul 2013 03:04:38 +0200
Subject: [PATCH 056/287] kvm: optimize away THP checks in kvm_is_mmio_pfn()

The checks on PG_reserved in the page structure on head and tail pages
aren't necessary because split_huge_page wouldn't transfer the
PG_reserved bit from head to tail anyway.

This was a forward-thinking check done in the case PageReserved was
set by a driver-owned page mapped in userland with something like
remap_pfn_range in a VM_PFNMAP region, but using hugepmds (not
possible right now). It was meant to be very safe, but it's overkill
as it's unlikely split_huge_page could ever run without the driver
noticing and tearing down the hugepage itself.

And if a driver in the future will really want to map a reserved
hugepage in userland using an huge pmd it should simply take care of
marking all subpages reserved too to keep KVM safe. This of course
would require such a hypothetical driver to tear down the huge pmd
itself and splitting the hugepage itself, instead of relaying on
split_huge_page, but that sounds very reasonable, especially
considering split_huge_page wouldn't currently transfer the reserved
bit anyway.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 11feeb498086a3a5907b8148bdf1786a9b18fc55)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b5afee319073..6c9130bd16c8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,28 +102,8 @@ static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-	if (pfn_valid(pfn)) {
-		int reserved;
-		struct page *tail = pfn_to_page(pfn);
-		struct page *head = compound_trans_head(tail);
-		reserved = PageReserved(head);
-		if (head != tail) {
-			/*
-			 * "head" is not a dangling pointer
-			 * (compound_trans_head takes care of that)
-			 * but the hugepage may have been splitted
-			 * from under us (and we may not hold a
-			 * reference count on the head page so it can
-			 * be reused before we run PageReferenced), so
-			 * we've to check PageTail before returning
-			 * what we just read.
-			 */
-			smp_rmb();
-			if (PageTail(tail))
-				return reserved;
-		}
-		return PageReserved(tail);
-	}
+	if (pfn_valid(pfn))
+		return PageReserved(pfn_to_page(pfn));
 
 	return true;
 }

From 1752d0c7e51f1a5e8d1f021d6b4eef1845c6da06 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 29 Aug 2013 11:08:22 +0100
Subject: [PATCH 057/287] ARM: KVM: vgic: simplify vgic_get_target_reg

vgic_get_target_reg is quite complicated, for no good reason.
Actually, it is fairly easy to write it in a much more efficient
way by using the target CPU array instead of the bitmap.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 986af8e0789a41ac4844e6eefed4a33e86524918)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 17c5ac7d10ed..a2d478aec046 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -432,19 +432,13 @@ static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
 static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
-	struct kvm_vcpu *vcpu;
-	int i, c;
-	unsigned long *bmap;
+	int i;
 	u32 val = 0;
 
 	irq -= VGIC_NR_PRIVATE_IRQS;
 
-	kvm_for_each_vcpu(c, vcpu, kvm) {
-		bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
-		for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
-			if (test_bit(irq + i, bmap))
-				val |= 1 << (c + i * 8);
-	}
+	for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
+		val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
 
 	return val;
 }

From 635c887f2931a200a52694170122e2b62d1b6218 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 29 Aug 2013 11:08:23 +0100
Subject: [PATCH 058/287] ARM: KVM: vgic: fix GICD_ICFGRn access

All the code in handle_mmio_cfg_reg() assumes the offset has
been shifted right to accomodate for the 2:1 bit compression,
but this is only done when getting the register address.

Shift the offset early so the code works mostly unchanged.

Reported-by: Zhaobo (Bob, ERC) <zhaobo@huawei.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 6545eae3d7a1b6dc2edb8ede9107998aee1207ef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index a2d478aec046..902789ff4abb 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -541,8 +541,12 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 				struct kvm_exit_mmio *mmio, phys_addr_t offset)
 {
 	u32 val;
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
-				       vcpu->vcpu_id, offset >> 1);
+	u32 *reg;
+
+	offset >>= 1;
+	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
+				  vcpu->vcpu_id, offset);
+
 	if (offset & 2)
 		val = *reg >> 16;
 	else

From a144ec826c81facd70fd157aef6f7b7030687f68 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 29 Aug 2013 11:08:24 +0100
Subject: [PATCH 059/287] ARM: KVM: Bugfix: vgic_bytemap_get_reg per cpu regs

For bytemaps each IRQ field is 1 byte wide, so we pack 4 irq fields in
one word and since there are 32 private (per cpu) irqs, we have 8
private u32 fields on the vgic_bytemap struct.  We shift the offset from
the base of the register group right by 2, giving us the word index
instead of the field index.  But then there are 8 private words, not 4,
which is also why we subtract 8 words from the offset of the shared
words.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 8d98915b6bda499e47d19166101d0bbcfd409c80)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 902789ff4abb..685fc72fc751 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -149,7 +149,7 @@ static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
 {
 	offset >>= 2;
 	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 4)
+	if (offset < 8)
 		return x->percpu[cpuid] + offset;
 	else
 		return x->shared + offset - 8;

From de5324b84480246ad6064cd3469d362995c2e929 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 29 Aug 2013 11:08:25 +0100
Subject: [PATCH 060/287] ARM: KVM: vgic: Bump VGIC_NR_IRQS to 256

The Versatile Express TC2 board, which we use as our main emulated
platform in QEMU, defines 160+32 == 192 interrupts, so limiting the
number of interrupts to 128 is not quite going to cut it for real board
emulation.

Note that this didn't use to be a problem because QEMU was buggy and
only defined 128 interrupts until recently.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 9b2d2e0df8a49414b1e5bc89148c9984dd87782a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 343744e4809c..7e2d15837b02 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -26,7 +26,7 @@
 #include <linux/types.h>
 #include <linux/irqchip/arm-gic.h>
 
-#define VGIC_NR_IRQS		128
+#define VGIC_NR_IRQS		256
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)

From 07557caabbdc70c2644e6f195fd07ca8be7d16b2 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 8 Aug 2013 20:35:07 -0700
Subject: [PATCH 061/287] ARM: KVM: Fix kvm_set_pte assignment

THe kvm_set_pte function was actually assigning the entire struct to the
structure member, which should work because the structure only has that
one member, but it is still not very nice.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 0963e5d0f22f9d197dbf206d8b5b2a150722cf5e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 472ac7091003..9b28c41f4ba9 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -64,7 +64,7 @@ void kvm_clear_hyp_idmap(void);
 
 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
-	pte_val(*pte) = new_pte;
+	*pte = new_pte;
 	/*
 	 * flush_pmd_entry just takes a void pointer and cleans the necessary
 	 * cache entries, so we can reuse the function for ptes.

From 14cffe44b8aad9a839fcbcf35f9d2bbd90f572c0 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 8 Aug 2013 20:34:22 -0700
Subject: [PATCH 062/287] ARM: KVM: Simplify tracepoint text

The tracepoint for kvm_guest_fault was extremely long, make it a
slightly bit shorter.

Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 6e72cc5700fe6b8776d537b736dab64b21ae0f1f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/trace.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index a8e73ed5ad5b..b1d640f78623 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault,
 		__entry->ipa			= ipa;
 	),
 
-	TP_printk("guest fault at PC %#08lx (hxfar %#08lx, "
-		  "ipa %#16llx, hsr %#08lx",
-		  __entry->vcpu_pc, __entry->hxfar,
-		  __entry->ipa, __entry->hsr)
+	TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+		  __entry->ipa, __entry->hsr,
+		  __entry->hxfar, __entry->vcpu_pc)
 );
 
 TRACE_EVENT(kvm_irq_line,

From 411b0c990125d54d48314d8ecd6e800bc9660509 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 19 Aug 2013 14:16:57 -0700
Subject: [PATCH 063/287] ARM: KVM: Work around older compiler bug

Compilers before 4.6 do not behave well with unnamed fields in structure
initializers and therefore produces build errors:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10676

By refering to the unnamed union using braces, both older and newer
compilers produce the same result.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reported-by: Russell King <linux@arm.linux.org.uk>
Tested-by: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 6833d83891140aedab7841589b7c7dbd7b600235)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/reset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b7840e7aa452..71e08baee209 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -40,7 +40,7 @@ static struct kvm_regs a15_regs_reset = {
 };
 
 static const struct kvm_irq_level a15_vtimer_irq = {
-	.irq = 27,
+	{ .irq = 27 },
 	.level = 1,
 };
 

From 215ed7558daf77d296ee388f64df34b0d479ea07 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 14 Aug 2013 12:33:48 -0700
Subject: [PATCH 064/287] ARM: KVM: Add newlines to panic strings

The panic strings are hard to read and on narrow terminals some
characters are simply truncated off the panic message.

Make is slightly prettier with a newline in the Hyp panic strings.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1fe40f6d39d23f39e643607a3e1883bfc74f1244)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/interrupts.S | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index f85052facffc..ddc15539bad2 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -492,10 +492,10 @@ __kvm_hyp_code_end:
 	.section ".rodata"
 
 und_die_str:
-	.ascii	"unexpected undefined exception in Hyp mode at: %#08x"
+	.ascii	"unexpected undefined exception in Hyp mode at: %#08x\n"
 pabt_die_str:
-	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x"
+	.ascii	"unexpected prefetch abort in Hyp mode at: %#08x\n"
 dabt_die_str:
-	.ascii	"unexpected data abort in Hyp mode at: %#08x"
+	.ascii	"unexpected data abort in Hyp mode at: %#08x\n"
 svc_die_str:
-	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x"
+	.ascii	"unexpected HVC/SVC trap in Hyp mode at: %#08x\n"

From 02ef4f0c0d0930d9d44e9929caaee12043447010 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 9 Sep 2013 13:52:33 +0200
Subject: [PATCH 065/287] KVM: mmu: allow page tables to be in read-only slots

Page tables in a read-only memory slot will currently cause a triple
fault because the page walker uses gfn_to_hva and it fails on such a slot.

OVMF uses such a page table; however, real hardware seems to be fine with
that as long as the accessed/dirty bits are set.  Save whether the slot
is readonly, and later check it when updating the accessed and dirty bits.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ba6a3541545542721ce821d1e7e5ce35752e6fdf)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/x86/kvm/paging_tmpl.h | 20 +++++++++++++++++++-
 include/linux/kvm_host.h   |  1 +
 virt/kvm/kvm_main.c        | 14 +++++++++-----
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index da20860b457a..e1af2394a23f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -69,6 +69,7 @@ struct guest_walker {
 	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
 	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
 	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
+	bool pte_writable[PT_MAX_FULL_LEVELS];
 	unsigned pt_access;
 	unsigned pte_access;
 	gfn_t gfn;
@@ -130,6 +131,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 		if (pte == orig_pte)
 			continue;
 
+		/*
+		 * If the slot is read-only, simply do not process the accessed
+		 * and dirty bits.  This is the correct thing to do if the slot
+		 * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
+		 * are only supported if the accessed and dirty bits are already
+		 * set in the ROM (so that MMIO writes are never needed).
+		 *
+		 * Note that NPT does not allow this at all and faults, since
+		 * it always wants nested page table entries for the guest
+		 * page tables to be writable.  And EPT works but will simply
+		 * overwrite the read-only memory to set the accessed and dirty
+		 * bits.
+		 */
+		if (unlikely(!walker->pte_writable[level - 1]))
+			continue;
+
 		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
 		if (ret)
 			return ret;
@@ -204,7 +221,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 			goto error;
 		real_gfn = gpa_to_gfn(real_gfn);
 
-		host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+		host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
+					    &walker->pte_writable[walker->level - 1]);
 		if (unlikely(kvm_is_error_hva(host_addr)))
 			goto error;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 15018c572ac5..e2befc2b1647 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -519,6 +519,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6c9130bd16c8..fcbcf5312f93 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 /*
- * The hva returned by this function is only allowed to be read.
- * It should pair with kvm_read_hva() or kvm_read_hva_atomic().
+ * If writable is set to false, the hva returned by this function is only
+ * allowed to be read.
  */
-static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 {
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	if (writable)
+		*writable = !memslot_is_readonly(slot);
+
 	return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
 }
 
@@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 	int r;
 	unsigned long addr;
 
-	addr = gfn_to_hva_read(kvm, gfn);
+	addr = gfn_to_hva_prot(kvm, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
 	r = kvm_read_hva(data, (void __user *)addr + offset, len);
@@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	int offset = offset_in_page(gpa);
 
-	addr = gfn_to_hva_read(kvm, gfn);
+	addr = gfn_to_hva_prot(kvm, gfn, NULL);
 	if (kvm_is_error_hva(addr))
 		return -EFAULT;
 	pagefault_disable();

From ed363014a225b257a32ad69b1ef0da615aecb50a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Wed, 4 Sep 2013 22:32:23 +0200
Subject: [PATCH 066/287] kvm: free resources after canceling async_pf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we cancel 'async_pf_execute()', we should behave as if the work was
never scheduled in 'kvm_setup_async_pf()'.
Fixes a bug when we can't unload module because the vm wasn't destroyed.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 28b441e24088081c1e213139d1303b451a34a4f4)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index ea475cd03511..8a39dda7a325 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -101,8 +101,11 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 				   typeof(*work), queue);
 		cancel_work_sync(&work->work);
 		list_del(&work->queue);
-		if (!work->done) /* work was canceled */
+		if (!work->done) { /* work was canceled */
+			mmdrop(work->mm);
+			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
+		}
 	}
 
 	spin_lock(&vcpu->async_pf.lock);

From 78169fda11a0b68310885d8991399bc3a51e5b2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Wed, 4 Sep 2013 22:32:24 +0200
Subject: [PATCH 067/287] kvm: remove .done from struct kvm_async_pf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'.done' is used to mark the completion of 'async_pf_execute()', but
'cancel_work_sync()' returns true when the work was canceled, so we
use it instead.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 98fda169290b3b28c0f2db2b8f02290c13da50ef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 1 -
 virt/kvm/async_pf.c      | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e2befc2b1647..dbbd78215204 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -176,7 +176,6 @@ struct kvm_async_pf {
 	unsigned long addr;
 	struct kvm_arch_async_pf arch;
 	struct page *page;
-	bool done;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8a39dda7a325..b197950ac4d5 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -75,7 +75,6 @@ static void async_pf_execute(struct work_struct *work)
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
 	apf->page = page;
-	apf->done = true;
 	spin_unlock(&vcpu->async_pf.lock);
 
 	/*
@@ -99,9 +98,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 		struct kvm_async_pf *work =
 			list_entry(vcpu->async_pf.queue.next,
 				   typeof(*work), queue);
-		cancel_work_sync(&work->work);
 		list_del(&work->queue);
-		if (!work->done) { /* work was canceled */
+		if (cancel_work_sync(&work->work)) {
 			mmdrop(work->mm);
 			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
@@ -166,7 +164,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 		return 0;
 
 	work->page = NULL;
-	work->done = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
 	work->addr = gfn_to_hva(vcpu->kvm, gfn);

From c3832c083abf356e0df1b581df22ee8a21034841 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 11 Sep 2013 15:27:41 -0700
Subject: [PATCH 068/287] ARM: kvm: rename cpu_reset to avoid name clash

cpu_reset is already #defined in <asm/proc-fns.h> as processor.reset,
so it expands here and causes problems.

Cc: <stable@vger.kernel.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit ac570e0493815e0b41681c89cb50d66421429d27)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/reset.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index 71e08baee209..c02ba4af599f 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -58,14 +58,14 @@ static const struct kvm_irq_level a15_vtimer_irq = {
  */
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 {
-	struct kvm_regs *cpu_reset;
+	struct kvm_regs *reset_regs;
 	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
 	case KVM_ARM_TARGET_CORTEX_A15:
 		if (vcpu->vcpu_id > a15_max_cpu_idx)
 			return -EINVAL;
-		cpu_reset = &a15_regs_reset;
+		reset_regs = &a15_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
 		cpu_vtimer_irq = &a15_vtimer_irq;
 		break;
@@ -74,7 +74,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	}
 
 	/* Reset core registers */
-	memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+	memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
 
 	/* Reset CP15 registers */
 	kvm_reset_coprocs(vcpu);

From ae0e4b34f8d96028faefddb83994cfd2c8cb8681 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 10 Sep 2013 12:57:17 +0200
Subject: [PATCH 069/287] KVM: cleanup (physical) CPU hotplug

Remove the useless argument, and do not do anything if there are no
VMs running at the time of the hotplug.

Cc: kvm@vger.kernel.org
Cc: gleb@redhat.com
Cc: jan.kiszka@siemens.com
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4fa92fb25ae5a2d79d872ab54df511c831b1f363)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fcbcf5312f93..7103b989ba54 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2681,10 +2681,11 @@ static void hardware_enable_nolock(void *junk)
 	}
 }
 
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
 {
 	raw_spin_lock(&kvm_lock);
-	hardware_enable_nolock(junk);
+	if (kvm_usage_count)
+		hardware_enable_nolock(NULL);
 	raw_spin_unlock(&kvm_lock);
 }
 
@@ -2698,10 +2699,11 @@ static void hardware_disable_nolock(void *junk)
 	kvm_arch_hardware_disable(NULL);
 }
 
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
 {
 	raw_spin_lock(&kvm_lock);
-	hardware_disable_nolock(junk);
+	if (kvm_usage_count)
+		hardware_disable_nolock(NULL);
 	raw_spin_unlock(&kvm_lock);
 }
 
@@ -2748,20 +2750,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
 {
 	int cpu = (long)v;
 
-	if (!kvm_usage_count)
-		return NOTIFY_OK;
-
 	val &= ~CPU_TASKS_FROZEN;
 	switch (val) {
 	case CPU_DYING:
 		printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
 		       cpu);
-		hardware_disable(NULL);
+		hardware_disable();
 		break;
 	case CPU_STARTING:
 		printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
 		       cpu);
-		hardware_enable(NULL);
+		hardware_enable();
 		break;
 	}
 	return NOTIFY_OK;

From 753f251708b5322430cf4902264d432a5daf06e4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 10 Sep 2013 12:58:35 +0200
Subject: [PATCH 070/287] KVM: protect kvm_usage_count with its own spinlock

The VM list need not be protected by a raw spinlock.  Separate the
two so that kvm_lock can be made non-raw.

Cc: kvm@vger.kernel.org
Cc: gleb@redhat.com
Cc: jan.kiszka@siemens.com
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4a937f96f3a29c58b7edd349d2e4dfac371efdf2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/locking.txt |  6 +++++-
 virt/kvm/kvm_main.c                   | 19 ++++++++++---------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 41b7ac9884b5..b1f5de22d090 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -135,7 +135,11 @@ Name:		kvm_lock
 Type:		raw_spinlock
 Arch:		any
 Protects:	- vm_list
-		- hardware virtualization enable/disable
+
+Name:		kvm_count_lock
+Type:		raw_spinlock_t
+Arch:		any
+Protects:	- hardware virtualization enable/disable
 Comment:	'raw' because hardware enabling/disabling must be atomic /wrt
 		migration.
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7103b989ba54..6bc0481bf8e5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -71,6 +71,7 @@ MODULE_LICENSE("GPL");
  */
 
 DEFINE_RAW_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
@@ -2683,10 +2684,10 @@ static void hardware_enable_nolock(void *junk)
 
 static void hardware_enable(void)
 {
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 	if (kvm_usage_count)
 		hardware_enable_nolock(NULL);
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_nolock(void *junk)
@@ -2701,10 +2702,10 @@ static void hardware_disable_nolock(void *junk)
 
 static void hardware_disable(void)
 {
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 	if (kvm_usage_count)
 		hardware_disable_nolock(NULL);
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static void hardware_disable_all_nolock(void)
@@ -2718,16 +2719,16 @@ static void hardware_disable_all_nolock(void)
 
 static void hardware_disable_all(void)
 {
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 	hardware_disable_all_nolock();
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 }
 
 static int hardware_enable_all(void)
 {
 	int r = 0;
 
-	raw_spin_lock(&kvm_lock);
+	raw_spin_lock(&kvm_count_lock);
 
 	kvm_usage_count++;
 	if (kvm_usage_count == 1) {
@@ -2740,7 +2741,7 @@ static int hardware_enable_all(void)
 		}
 	}
 
-	raw_spin_unlock(&kvm_lock);
+	raw_spin_unlock(&kvm_count_lock);
 
 	return r;
 }
@@ -3050,7 +3051,7 @@ static int kvm_suspend(void)
 static void kvm_resume(void)
 {
 	if (kvm_usage_count) {
-		WARN_ON(raw_spin_is_locked(&kvm_lock));
+		WARN_ON(raw_spin_is_locked(&kvm_count_lock));
 		hardware_enable_nolock(NULL);
 	}
 }

From 9c904866d4424d18852872878a57a899e48f3838 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 25 Sep 2013 13:53:07 +0200
Subject: [PATCH 071/287] KVM: Convert kvm_lock back to non-raw spinlock

In commit e935b8372cf8 ("KVM: Convert kvm_lock to raw_spinlock"),
the kvm_lock was made a raw lock.  However, the kvm mmu_shrink()
function tries to grab the (non-raw) mmu_lock within the scope of
the raw locked kvm_lock being held.  This leads to the following:

BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
in_atomic(): 1, irqs_disabled(): 0, pid: 55, name: kswapd0
Preemption disabled at:[<ffffffffa0376eac>] mmu_shrink+0x5c/0x1b0 [kvm]

Pid: 55, comm: kswapd0 Not tainted 3.4.34_preempt-rt
Call Trace:
 [<ffffffff8106f2ad>] __might_sleep+0xfd/0x160
 [<ffffffff817d8d64>] rt_spin_lock+0x24/0x50
 [<ffffffffa0376f3c>] mmu_shrink+0xec/0x1b0 [kvm]
 [<ffffffff8111455d>] shrink_slab+0x17d/0x3a0
 [<ffffffff81151f00>] ? mem_cgroup_iter+0x130/0x260
 [<ffffffff8111824a>] balance_pgdat+0x54a/0x730
 [<ffffffff8111fe47>] ? set_pgdat_percpu_threshold+0xa7/0xd0
 [<ffffffff811185bf>] kswapd+0x18f/0x490
 [<ffffffff81070961>] ? get_parent_ip+0x11/0x50
 [<ffffffff81061970>] ? __init_waitqueue_head+0x50/0x50
 [<ffffffff81118430>] ? balance_pgdat+0x730/0x730
 [<ffffffff81060d2b>] kthread+0xdb/0xe0
 [<ffffffff8106e122>] ? finish_task_switch+0x52/0x100
 [<ffffffff817e1e94>] kernel_thread_helper+0x4/0x10
 [<ffffffff81060c50>] ? __init_kthread_worker+0x

After the previous patch, kvm_lock need not be a raw spinlock anymore,
so change it back.

Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: kvm@vger.kernel.org
Cc: gleb@redhat.com
Cc: jan.kiszka@siemens.com
Reviewed-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 2f303b74a62fb74983c0a66e2df353be963c527c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/locking.txt |  2 +-
 arch/x86/kvm/mmu.c                    |  4 ++--
 arch/x86/kvm/x86.c                    |  8 ++++----
 include/linux/kvm_host.h              |  2 +-
 virt/kvm/kvm_main.c                   | 18 +++++++++---------
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index b1f5de22d090..ba035c33d01c 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -132,7 +132,7 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
 ------------
 
 Name:		kvm_lock
-Type:		raw_spinlock
+Type:		spinlock_t
 Arch:		any
 Protects:	- vm_list
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 004cc87b781c..3c1877bbfe6a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4220,7 +4220,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 	if (nr_to_scan == 0)
 		goto out;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		int idx;
@@ -4256,7 +4256,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
 		break;
 	}
 
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 out:
 	return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 894003d79d22..96765c116c26 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5104,7 +5104,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
 	smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (vcpu->cpu != freq->cpu)
@@ -5114,7 +5114,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 				send_ipi = 1;
 		}
 	}
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 	if (freq->old < freq->new && send_ipi) {
 		/*
@@ -5261,12 +5261,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
 	atomic_set(&kvm_guest_has_master_clock, 0);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 }
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dbbd78215204..97e39fc02020 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -133,7 +133,7 @@ struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-extern raw_spinlock_t kvm_lock;
+extern spinlock_t kvm_lock;
 extern struct list_head vm_list;
 
 struct kvm_io_range {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6bc0481bf8e5..8b47fd241a61 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,7 @@ MODULE_LICENSE("GPL");
  * 		kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
 static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
@@ -491,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err;
 
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 
 	return kvm;
 
@@ -582,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	struct mm_struct *mm = kvm->mm;
 
 	kvm_arch_sync_events(kvm);
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_del(&kvm->vm_list);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kvm_io_bus_destroy(kvm->buses[i]);
@@ -2974,10 +2974,10 @@ static int vm_stat_get(void *_offset, u64 *val)
 	struct kvm *kvm;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		*val += *(u32 *)((void *)kvm + offset);
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return 0;
 }
 
@@ -2991,12 +2991,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 	int i;
 
 	*val = 0;
-	raw_spin_lock(&kvm_lock);
+	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
 			*val += *(u32 *)((void *)vcpu + offset);
 
-	raw_spin_unlock(&kvm_lock);
+	spin_unlock(&kvm_lock);
 	return 0;
 }
 

From 5c1d6aafed853c88a645ac52134e217389c2cc8a Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Wed, 11 Sep 2013 18:34:22 +0530
Subject: [PATCH 072/287] KVM: ARM: Fix typo in comments of inject_abt()

Very minor typo in comments of inject_abt() when we update fault status
register for injecting prefetch abort.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit b373e492f3a3469c615c2ae218d2f723900bf981)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index bdede9e7da51..d6c005283678 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
 	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
 
 	if (is_pabt) {
-		/* Set DFAR and DFSR */
+		/* Set IFAR and IFSR */
 		vcpu->arch.cp15[c6_IFAR] = addr;
 		is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
 		/* Always give debug fault for now - should give guest a clue */

From f166457e087b577af2ee72ec843d43ef6653e0f8 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Mon, 30 Sep 2013 14:20:05 +0530
Subject: [PATCH 073/287] ARM: KVM: Implement kvm_vcpu_preferred_target()
 function

This patch implements kvm_vcpu_preferred_target() function for
KVM ARM which will help us implement KVM_ARM_PREFERRED_TARGET ioctl
for user space.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4a6fee805d5e278e4733bf933cb5b184b7a8be1f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h |  1 +
 arch/arm/kvm/guest.c            | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 7d22517d8071..76f3c1978442 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -154,6 +154,7 @@ struct kvm_vcpu_stat {
 struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 152d03612181..ec98209fda71 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -222,6 +222,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 	return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+	int target = kvm_target_cpu();
+
+	if (target < 0)
+		return -ENODEV;
+
+	memset(init, 0, sizeof(*init));
+
+	/*
+	 * For now, we don't return any features.
+	 * In future, we might use features to return target
+	 * specific features available for the preferred
+	 * target type.
+	 */
+	init->target = (__u32)target;
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -EINVAL;

From 9778336fe11f088c3ddbd12714ab88f5760a1de4 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Mon, 30 Sep 2013 14:20:06 +0530
Subject: [PATCH 074/287] ARM64: KVM: Implement kvm_vcpu_preferred_target()
 function

This patch implements kvm_vcpu_preferred_target() function for
KVM ARM64 which will help us implement KVM_ARM_PREFERRED_TARGET
ioctl for user space.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 473bdc0e6565ebb22455657a40daa21b6b4ee16b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  1 +
 arch/arm64/kvm/guest.c            | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0859a4ddd1e7..4cc8c7078f39 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -151,6 +151,7 @@ struct kvm_vcpu_stat {
 struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 struct kvm_one_reg;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2c3ff67a8ecb..3f0731e53274 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -248,6 +248,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 	return kvm_reset_vcpu(vcpu);
 }
 
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+	int target = kvm_target_cpu();
+
+	if (target < 0)
+		return -ENODEV;
+
+	memset(init, 0, sizeof(*init));
+
+	/*
+	 * For now, we don't return any features.
+	 * In future, we might use features to return target
+	 * specific features available for the preferred
+	 * target type.
+	 */
+	init->target = (__u32)target;
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -EINVAL;

From df50e5da2c1c65f1db1e9186112b57a8dd0b41ae Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Mon, 30 Sep 2013 14:20:07 +0530
Subject: [PATCH 075/287] ARM/ARM64: KVM: Implement KVM_ARM_PREFERRED_TARGET
 ioctl

For implementing CPU=host, we need a mechanism for querying
preferred VCPU target type on underlying Host.

This patch implements KVM_ARM_PREFERRED_TARGET vm ioctl which
returns struct kvm_vcpu_init instance containing information
about preferred VCPU target type and target specific features
available for it.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 42c4e0c77ac91505ab94284b14025e3a0865c0a5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c       | 13 +++++++++++++
 include/uapi/linux/kvm.h |  1 +
 2 files changed, 14 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9c697db2787e..cc5adb9349ef 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -797,6 +797,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			return -EFAULT;
 		return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
 	}
+	case KVM_ARM_PREFERRED_TARGET: {
+		int err;
+		struct kvm_vcpu_init init;
+
+		err = kvm_vcpu_preferred_target(&init);
+		if (err)
+			return err;
+
+		if (copy_to_user(argp, &init, sizeof(init)))
+			return -EFAULT;
+
+		return 0;
+	}
 	default:
 		return -EINVAL;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index acccd08be6c7..cba62019348d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1011,6 +1011,7 @@ struct kvm_s390_ucas_mapping {
 /* VM is being stopped by host */
 #define KVM_KVMCLOCK_CTRL	  _IO(KVMIO,   0xad)
 #define KVM_ARM_VCPU_INIT	  _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
+#define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST	  _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)

From 89c774beb24924963aa146784a149e47efb29d82 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 1 Oct 2013 19:58:36 +0300
Subject: [PATCH 076/287] Fix NULL dereference in gfn_to_hva_prot()

gfn_to_memslot() can return NULL or invalid slot. We need to check slot
validity before accessing it.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit a2ac07fe292ea41296049dfdbfeed203e2467ee7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8b47fd241a61..c5bc5aef11f5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1065,10 +1065,12 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 {
 	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
-	if (writable)
+	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+
+	if (!kvm_is_error_hva(hva) && writable)
 		*writable = !memslot_is_readonly(slot);
 
-	return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+	return hva;
 }
 
 static int kvm_read_hva(void *data, void __user *hva, int len)

From 708854902d50f40783a81a629c4131383249236d Mon Sep 17 00:00:00 2001
From: Andre Richter <andre.o.richter@gmail.com>
Date: Wed, 2 Oct 2013 12:23:26 +0200
Subject: [PATCH 077/287] virt/kvm/iommu.c: Add leading zeros to device's BDF
 notation in debug messages

When KVM (de)assigns PCI(e) devices to VMs, a debug message is printed
including the BDF notation of the respective device. Currently, the BDF
notation does not have the commonly used leading zeros. This produces
messages like "assign device 0:1:8.0", which look strange at first sight.

The patch fixes this by exchanging the printk(KERN_DEBUG ...) with dev_info()
and also inserts "kvm" into the debug message, so that it is obvious where
the message comes from. Also reduces LoC.

Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Andre Richter <andre.o.richter@gmail.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 29242cb5c63b1f8e12e8055ba1a6c3e0004fa86d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/iommu.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 72a130bc448a..a3b14109049b 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -190,11 +190,7 @@ int kvm_assign_device(struct kvm *kvm,
 
 	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
 
-	printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
-		assigned_dev->host_segnr,
-		assigned_dev->host_busnr,
-		PCI_SLOT(assigned_dev->host_devfn),
-		PCI_FUNC(assigned_dev->host_devfn));
+	dev_info(&pdev->dev, "kvm assign device\n");
 
 	return 0;
 out_unmap:
@@ -220,11 +216,7 @@ int kvm_deassign_device(struct kvm *kvm,
 
 	pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 
-	printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
-		assigned_dev->host_segnr,
-		assigned_dev->host_busnr,
-		PCI_SLOT(assigned_dev->host_devfn),
-		PCI_FUNC(assigned_dev->host_devfn));
+	dev_info(&pdev->dev, "kvm deassign device\n");
 
 	return 0;
 }

From c1b378c34e3f2856c0ce060a8dad0877c6d6bfb9 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 26 Sep 2013 16:49:27 +0100
Subject: [PATCH 078/287] KVM: ARM: Fix calculation of virtual CPU ID

KVM does not have a notion of multiple clusters for CPUs, just a linear
array of CPUs. When using a system with cores in more than one cluster, the
current method for calculating the virtual MPIDR will leak the (physical)
cluster information into the virtual MPIDR. One effect of this is that
Linux under KVM fails to boot multiple CPUs that aren't in the 0th cluster.

This patch does away with exposing the real MPIDR fields in favour of simply
using the virtual CPU number (but preserving the U bit, as before).

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1158fca401e09665c440a9fe4fd4f131ee85c13b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc_a15.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index cf93472b9dd6..bbd4b888dbf3 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -27,14 +27,11 @@
 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
 {
 	/*
-	 * Compute guest MPIDR:
-	 * (Even if we present only one VCPU to the guest on an SMP
-	 * host we don't set the U bit in the MPIDR, or vice versa, as
-	 * revealing the underlying hardware properties is likely to
-	 * be the best choice).
+	 * Compute guest MPIDR. No need to mess around with different clusters
+	 * but we read the 'U' bit from the underlying hardware directly.
 	 */
-	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
-		| (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
+	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
+					| vcpu->vcpu_id;
 }
 
 #include "coproc.h"

From e82866a69bd6e3ed1a1405ba19f1d5e66e5129a2 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 26 Sep 2013 16:49:26 +0100
Subject: [PATCH 079/287] KVM: ARM: fix the size of TTBCR_{T0SZ,T1SZ} masks

The T{0,1}SZ fields of TTBCR are 3 bits wide when using the long descriptor
format. Likewise, the T0SZ field of the HTCR is 3-bits. KVM currently
defines TTBCR_T{0,1}SZ as 3, not 7.

The T0SZ mask is used to calculate the value for the HTCR, both to pick out
TTBCR.T0SZ and mask off the equivalent field in the HTCR during
read-modify-write. The incorrect mask size causes the (UNKNOWN) reset value
of HTCR.T0SZ to leak in to the calculated HTCR value. Linux will hang when
initializing KVM if HTCR's reset value has bit 2 set (sometimes the case on
A7/TC2)

Fixing T0SZ allows A7 cores to boot and T1SZ is also fixed for completeness.

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 5e497046f005528464f9600a4ee04f49df713596)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 64e96960de29..d556f03bca17 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -95,12 +95,12 @@
 #define TTBCR_IRGN1	(3 << 24)
 #define TTBCR_EPD1	(1 << 23)
 #define TTBCR_A1	(1 << 22)
-#define TTBCR_T1SZ	(3 << 16)
+#define TTBCR_T1SZ	(7 << 16)
 #define TTBCR_SH0	(3 << 12)
 #define TTBCR_ORGN0	(3 << 10)
 #define TTBCR_IRGN0	(3 << 8)
 #define TTBCR_EPD0	(1 << 7)
-#define TTBCR_T0SZ	3
+#define TTBCR_T0SZ	(7 << 0)
 #define HTCR_MASK	(TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
 
 /* Hyp System Trap Register */

From 5e1ddf60b6830d405ebb026e3d9570e9236a6600 Mon Sep 17 00:00:00 2001
From: Jonathan Austin <jonathan.austin@arm.com>
Date: Thu, 26 Sep 2013 16:49:28 +0100
Subject: [PATCH 080/287] KVM: ARM: Add support for Cortex-A7

This patch adds support for running Cortex-A7 guests on Cortex-A7 hosts.

As Cortex-A7 is architecturally compatible with A15, this patch is largely just
generalising existing code. Areas where 'implementation defined' behaviour
is identical for A7 and A15 is moved to allow it to be used by both cores.

The check to ensure that coprocessor register tables are sorted correctly is
also moved in to 'common' code to avoid each new cpu doing its own check
(and possibly forgetting to do so!)

Signed-off-by: Jonathan Austin <jonathan.austin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e8c2d99f8277d68d28a9f99d16289712bc2aee7f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h  |   2 +-
 arch/arm/include/uapi/asm/kvm.h |   3 +-
 arch/arm/kvm/Makefile           |   2 +-
 arch/arm/kvm/coproc.c           | 114 ++++++++++++++++++++++++++++++++
 arch/arm/kvm/coproc_a15.c       | 114 +-------------------------------
 arch/arm/kvm/coproc_a7.c        |  54 +++++++++++++++
 arch/arm/kvm/guest.c            |   4 +-
 arch/arm/kvm/reset.c            |  15 +++--
 8 files changed, 184 insertions(+), 124 deletions(-)
 create mode 100644 arch/arm/kvm/coproc_a7.c

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index a2f43ddcc300..661da11f76f4 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -39,7 +39,7 @@
 #define c6_IFAR		17	/* Instruction Fault Address Register */
 #define c7_PAR		18	/* Physical Address Register */
 #define c7_PAR_high	19	/* PAR top 32 bits */
-#define c9_L2CTLR	20	/* Cortex A15 L2 Control Register */
+#define c9_L2CTLR	20	/* Cortex A15/A7 L2 Control Register */
 #define c10_PRRR	21	/* Primary Region Remap Register */
 #define c10_NMRR	22	/* Normal Memory Remap Register */
 #define c12_VBAR	23	/* Vector Base Address Register */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c1ee007523d7..c498b60c0505 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -63,7 +63,8 @@ struct kvm_regs {
 
 /* Supported Processor Types */
 #define KVM_ARM_TARGET_CORTEX_A15	0
-#define KVM_ARM_NUM_TARGETS		1
+#define KVM_ARM_TARGET_CORTEX_A7	1
+#define KVM_ARM_NUM_TARGETS		2
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index d99bee4950e5..789bca9e64a7 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -19,6 +19,6 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index db9cf692d4dd..a629f2c1d0f9 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -71,6 +71,92 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	/*
+	 * Compute guest MPIDR. No need to mess around with different clusters
+	 * but we read the 'U' bit from the underlying hardware directly.
+	 */
+	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
+					| vcpu->vcpu_id;
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+			 const struct coproc_params *p,
+			 const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+	return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+			const struct coproc_params *p,
+			const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return write_to_read_only(vcpu, p);
+	return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+	return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 l2ctlr, ncores;
+
+	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+	l2ctlr &= ~(3 << 24);
+	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+	l2ctlr |= (ncores & 3) << 24;
+
+	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+	u32 actlr;
+
+	/* ACTLR contains SMP bit: make sure you create all cpus first! */
+	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+	/* Make the SMP bit consistent with the guest configuration */
+	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+		actlr |= 1U << 6;
+	else
+		actlr &= ~(1U << 6);
+
+	vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+			   const struct coproc_params *p,
+			   const struct coproc_reg *r)
+{
+	if (p->is_write)
+		return ignore_write(vcpu, p);
+
+	*vcpu_reg(vcpu, p->Rt1) = 0;
+	return true;
+}
+
 /* See note at ARM ARM B1.14.4 */
 static bool access_dcsw(struct kvm_vcpu *vcpu,
 			const struct coproc_params *p,
@@ -153,10 +239,22 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg cp15_regs[] = {
+	/* MPIDR: we use VMPIDR for guest access. */
+	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+			NULL, reset_mpidr, c0_MPIDR },
+
 	/* CSSELR: swapped by interrupt.S. */
 	{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
 			NULL, reset_unknown, c0_CSSELR },
 
+	/* ACTLR: trapped by HCR.TAC bit. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_actlr, reset_actlr, c1_ACTLR },
+
+	/* CPACR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+			NULL, reset_val, c1_CPACR, 0x00000000 },
+
 	/* TTBR0/TTBR1: swapped by interrupt.S. */
 	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
 	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
@@ -194,6 +292,13 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, access_dcsw},
 	{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
 	{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
+	/*
+	 * L2CTLR access (guest wants to know #CPUs).
+	 */
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
 	/*
 	 * Dummy performance monitor implementation.
 	 */
@@ -234,6 +339,9 @@ static const struct coproc_reg cp15_regs[] = {
 	/* CNTKCTL: swapped by interrupt.S. */
 	{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+	/* The Configuration Base Address Register. */
+	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 /* Target specific emulation tables */
@@ -241,6 +349,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
 
 void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
 {
+	unsigned int i;
+
+	for (i = 1; i < table->num; i++)
+		BUG_ON(cmp_reg(&table->table[i-1],
+			       &table->table[i]) >= 0);
+
 	target_tables[table->target] = table;
 }
 
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index bbd4b888dbf3..bb0cac1410cc 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -17,98 +17,12 @@
  * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 #include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
 #include <linux/init.h>
 
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	/*
-	 * Compute guest MPIDR. No need to mess around with different clusters
-	 * but we read the 'U' bit from the underlying hardware directly.
-	 */
-	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
-					| vcpu->vcpu_id;
-}
-
 #include "coproc.h"
 
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
-			 const struct coproc_params *p,
-			 const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
-	return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
-			const struct coproc_params *p,
-			const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return write_to_read_only(vcpu, p);
-	return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
-			  const struct coproc_params *p,
-			  const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
-	return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	u32 l2ctlr, ncores;
-
-	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
-	l2ctlr &= ~(3 << 24);
-	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
-	l2ctlr |= (ncores & 3) << 24;
-
-	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
-	u32 actlr;
-
-	/* ACTLR contains SMP bit: make sure you create all cpus first! */
-	asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
-	/* Make the SMP bit consistent with the guest configuration */
-	if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
-		actlr |= 1U << 6;
-	else
-		actlr &= ~(1U << 6);
-
-	vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
-			   const struct coproc_params *p,
-			   const struct coproc_reg *r)
-{
-	if (p->is_write)
-		return ignore_write(vcpu, p);
-
-	*vcpu_reg(vcpu, p->Rt1) = 0;
-	return true;
-}
-
 /*
  * A15-specific CP15 registers.
  * CRn denotes the primary register number, but is copied to the CRm in the
@@ -118,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
  *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg a15_regs[] = {
-	/* MPIDR: we use VMPIDR for guest access. */
-	{ CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
-			NULL, reset_mpidr, c0_MPIDR },
-
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c1_SCTLR, 0x00C50078 },
-	/* ACTLR: trapped by HCR.TAC bit. */
-	{ CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
-			access_actlr, reset_actlr, c1_ACTLR },
-	/* CPACR: swapped by interrupt.S. */
-	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_val, c1_CPACR, 0x00000000 },
-
-	/*
-	 * L2CTLR access (guest wants to know #CPUs).
-	 */
-	{ CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
-			access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
-	{ CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
-	/* The Configuration Base Address Register. */
-	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
@@ -151,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
 
 static int __init coproc_a15_init(void)
 {
-	unsigned int i;
-
-	for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
-		BUG_ON(cmp_reg(&a15_regs[i-1],
-			       &a15_regs[i]) >= 0);
-
 	kvm_register_target_coproc_table(&a15_target_table);
 	return 0;
 }
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644
index 000000000000..1df767331588
--- /dev/null
+++ b/arch/arm/kvm/coproc_a7.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ *          Christoffer Dall <c.dall@virtualopensystems.com>
+ *          Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+	/* SCTLR: swapped by interrupt.S. */
+	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+			NULL, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+	.target = KVM_ARM_TARGET_CORTEX_A7,
+	.table = a7_regs,
+	.num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+	kvm_register_target_coproc_table(&a7_target_table);
+	return 0;
+}
+late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index ec98209fda71..20f8d97904af 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -190,6 +190,8 @@ int __attribute_const__ kvm_target_cpu(void)
 		return -EINVAL;
 
 	switch (part_number) {
+	case ARM_CPU_PART_CORTEX_A7:
+		return KVM_ARM_TARGET_CORTEX_A7;
 	case ARM_CPU_PART_CORTEX_A15:
 		return KVM_ARM_TARGET_CORTEX_A15;
 	default:
@@ -202,7 +204,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 {
 	unsigned int i;
 
-	/* We can only do a cortex A15 for now. */
+	/* We can only cope with guest==host and only on A15/A7 (for now). */
 	if (init->target != kvm_target_cpu())
 		return -EINVAL;
 
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index c02ba4af599f..d153e64d1255 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -30,16 +30,16 @@
 #include <kvm/arm_arch_timer.h>
 
 /******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
  */
 
-static const int a15_max_cpu_idx = 3;
+static const int cortexa_max_cpu_idx = 3;
 
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
 
-static const struct kvm_irq_level a15_vtimer_irq = {
+static const struct kvm_irq_level cortexa_vtimer_irq = {
 	{ .irq = 27 },
 	.level = 1,
 };
@@ -62,12 +62,13 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	const struct kvm_irq_level *cpu_vtimer_irq;
 
 	switch (vcpu->arch.target) {
+	case KVM_ARM_TARGET_CORTEX_A7:
 	case KVM_ARM_TARGET_CORTEX_A15:
-		if (vcpu->vcpu_id > a15_max_cpu_idx)
+		if (vcpu->vcpu_id > cortexa_max_cpu_idx)
 			return -EINVAL;
-		reset_regs = &a15_regs_reset;
+		reset_regs = &cortexa_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
-		cpu_vtimer_irq = &a15_vtimer_irq;
+		cpu_vtimer_irq = &cortexa_vtimer_irq;
 		break;
 	default:
 		return -ENODEV;

From c7a9a5f3f02ff35d7ee820e98c1b9d46ab425808 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 14:22:28 -0700
Subject: [PATCH 081/287] KVM: Move gfn_to_index to x86 specific code

The gfn_to_index function relies on huge page defines which either may
not make sense on systems that don't support huge pages or are defined
in an unconvenient way for other architectures.  Since this is
x86-specific, move the function to arch/x86/include/asm/kvm_host.h.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 6d9d41e57440e32a3400f37aa05ef7a1a09ced64)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/x86/include/asm/kvm_host.h | 7 +++++++
 include/linux/kvm_host.h        | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3741c653767c..53db582487c9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 97e39fc02020..b5267c4f5392 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -827,13 +827,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
 	return gfn_to_memslot(kvm, gfn)->id;
 }
 
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
-	/* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
-	return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
-		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
 static inline gfn_t
 hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 {

From 4b0745341e63f3878fcc1240d44d1ddcab068ce1 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 14:22:29 -0700
Subject: [PATCH 082/287] KVM: ARM: Get rid of KVM_HPAGE defines

The KVM_HPAGE_DEFINES are a little artificial on ARM, since the huge
page size is statically defined at compile time and there is only a
single huge page size.

Now when the main kvm code relying on these defines has been moved to
the x86 specific part of the world, we can get rid of these.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit dc6f6763dfeaf2dfec906bb78875dcea162accd9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 76f3c1978442..8a6f6db14ee4 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -38,11 +38,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 1
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
-
 #include <kvm/arm_vgic.h>
 
 struct kvm_vcpu;

From 1feff9299a9b7b1f137166f062772cd2044c7cbb Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 14:22:30 -0700
Subject: [PATCH 083/287] KVM: arm64: Get rid of KVM_HPAGE defines

Now when the main kvm code relying on these defines has been moved to
the x86 specific part of the world, we can get rid of these.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit ef0cfe71c2b1710cd4ae747537e36c56f9a26ccf)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4cc8c7078f39..5d85a02d1231 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -36,11 +36,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 2
 
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x)	0
-#define KVM_NR_PAGE_SIZES	1
-#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
-
 struct kvm_vcpu;
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);

From 0a2e6af7befb4c0336f2c789d172bbb5438aec72 Mon Sep 17 00:00:00 2001
From: chai wen <chaiw.fnst@cn.fujitsu.com>
Date: Mon, 14 Oct 2013 22:22:33 +0800
Subject: [PATCH 084/287] KVM: Drop FOLL_GET in GUP when doing async page fault

Page pinning is not mandatory in kvm async page fault processing since
after async page fault event is delivered to a guest it accesses page once
again and does its own GUP.  Drop the FOLL_GET flag in GUP in async_pf
code, and do some simplifying in check/clear processing.

Suggested-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Gu zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: chai wen <chaiw.fnst@cn.fujitsu.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit f2e106692d5189303997ad7b96de8d8123aa5613)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/x86/kvm/x86.c         |  4 ++--
 include/linux/kvm_host.h   |  2 +-
 include/trace/events/kvm.h | 10 ++++------
 virt/kvm/async_pf.c        | 17 +++++------------
 4 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 96765c116c26..e4985fc604fe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7118,7 +7118,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	int r;
 
 	if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
-	      is_error_page(work->page))
+	      work->wakeup_all)
 		return;
 
 	r = kvm_mmu_reload(vcpu);
@@ -7228,7 +7228,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 	struct x86_exception fault;
 
 	trace_kvm_async_pf_ready(work->arch.token, work->gva);
-	if (is_error_page(work->page))
+	if (work->wakeup_all)
 		work->arch.token = ~0; /* broadcast wakeup */
 	else
 		kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5267c4f5392..384fb0a74924 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -175,7 +175,7 @@ struct kvm_async_pf {
 	gva_t gva;
 	unsigned long addr;
 	struct kvm_arch_async_pf arch;
-	struct page *page;
+	bool   wakeup_all;
 };
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7005d1109ec9..131a0bda7aec 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
 
 TRACE_EVENT(
 	kvm_async_pf_completed,
-	TP_PROTO(unsigned long address, struct page *page, u64 gva),
-	TP_ARGS(address, page, gva),
+	TP_PROTO(unsigned long address, u64 gva),
+	TP_ARGS(address, gva),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, address)
-		__field(pfn_t, pfn)
 		__field(u64, gva)
 		),
 
 	TP_fast_assign(
 		__entry->address = address;
-		__entry->pfn = page ? page_to_pfn(page) : 0;
 		__entry->gva = gva;
 		),
 
-	TP_printk("gva %#llx address %#lx pfn %#llx",  __entry->gva,
-		  __entry->address, __entry->pfn)
+	TP_printk("gva %#llx address %#lx",  __entry->gva,
+		  __entry->address)
 );
 
 #endif
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index b197950ac4d5..8631d9c14320 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void async_pf_execute(struct work_struct *work)
 {
-	struct page *page = NULL;
 	struct kvm_async_pf *apf =
 		container_of(work, struct kvm_async_pf, work);
 	struct mm_struct *mm = apf->mm;
@@ -68,13 +67,12 @@ static void async_pf_execute(struct work_struct *work)
 
 	use_mm(mm);
 	down_read(&mm->mmap_sem);
-	get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	unuse_mm(mm);
 
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);
-	apf->page = page;
 	spin_unlock(&vcpu->async_pf.lock);
 
 	/*
@@ -82,7 +80,7 @@ static void async_pf_execute(struct work_struct *work)
 	 * this point
 	 */
 
-	trace_kvm_async_pf_completed(addr, page, gva);
+	trace_kvm_async_pf_completed(addr, gva);
 
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
@@ -112,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 			list_entry(vcpu->async_pf.done.next,
 				   typeof(*work), link);
 		list_del(&work->link);
-		if (!is_error_page(work->page))
-			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
 	spin_unlock(&vcpu->async_pf.lock);
@@ -133,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 		list_del(&work->link);
 		spin_unlock(&vcpu->async_pf.lock);
 
-		if (work->page)
-			kvm_arch_async_page_ready(vcpu, work);
+		kvm_arch_async_page_ready(vcpu, work);
 		kvm_arch_async_page_present(vcpu, work);
 
 		list_del(&work->queue);
 		vcpu->async_pf.queued--;
-		if (!is_error_page(work->page))
-			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
 }
@@ -163,7 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	if (!work)
 		return 0;
 
-	work->page = NULL;
+	work->wakeup_all = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
 	work->addr = gfn_to_hva(vcpu->kvm, gfn);
@@ -203,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
 	if (!work)
 		return -ENOMEM;
 
-	work->page = KVM_ERR_PTR_BAD_PAGE;
+	work->wakeup_all = true;
 	INIT_LIST_HEAD(&work->queue); /* for list_del to work */
 
 	spin_lock(&vcpu->async_pf.lock);

From 841372680792857d33c4c49d21be8a3cbde0490b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 22:18:00 +0530
Subject: [PATCH 085/287] kvm: Add struct kvm arg to memslot APIs

We will use that in the later patch to find the kvm ops handler

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
(cherry picked from commit 5587027ce9d59a57aecaa190be1c8e560aaff45d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c                 |  5 +++--
 arch/ia64/kvm/kvm-ia64.c           |  5 +++--
 arch/mips/kvm/kvm_mips.c           |  5 +++--
 arch/powerpc/include/asm/kvm_ppc.h |  6 ++++--
 arch/powerpc/kvm/booke.c           |  4 ++--
 arch/powerpc/kvm/powerpc.c         |  9 +++++----
 arch/s390/kvm/kvm-s390.c           |  5 +++--
 arch/x86/kvm/x86.c                 |  5 +++--
 include/linux/kvm_host.h           |  5 +++--
 virt/kvm/kvm_main.c                | 12 ++++++------
 10 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index cc5adb9349ef..e312e4a53f8d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bdfd8789b376..985bf80c622e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index a7b044536de4..73b34827826c 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe03d77..e2dd05c81bc6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -143,9 +143,11 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
 extern void kvm_release_hpt(struct kvmppc_linear_info *li);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
-extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+				     struct kvm_memory_slot *free,
 				     struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+extern int kvmppc_core_create_memslot(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1a1b51189773..0a91f47e264b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1592,12 +1592,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return -ENOTSUPP;
 }
 
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			      struct kvm_memory_slot *dont)
 {
 }
 
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			       unsigned long npages)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ae63ae4a1a5f..750835a4ef70 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -409,15 +409,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
-	kvmppc_core_free_memslot(free, dont);
+	kvmppc_core_free_memslot(kvm, free, dont);
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
-	return kvmppc_core_create_memslot(slot, npages);
+	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
 void kvm_arch_memslots_updated(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e515b2d4a947..54612d0e79dd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -971,12 +971,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e4985fc604fe..68b139fe0dbd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6897,7 +6897,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
 	int i;
@@ -6918,7 +6918,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 	}
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
 {
 	int i;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 384fb0a74924..4de0a8fedf3f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -493,9 +493,10 @@ int kvm_set_memory_region(struct kvm *kvm,
 			  struct kvm_userspace_memory_region *mem);
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem);
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages);
 void kvm_arch_memslots_updated(struct kvm *kvm);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c5bc5aef11f5..c777a6e582f0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -541,13 +541,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 /*
  * Free any memory in @free but not in @dont.
  */
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
-	kvm_arch_free_memslot(free, dont);
+	kvm_arch_free_memslot(kvm, free, dont);
 
 	free->npages = 0;
 }
@@ -558,7 +558,7 @@ void kvm_free_physmem(struct kvm *kvm)
 	struct kvm_memory_slot *memslot;
 
 	kvm_for_each_memslot(memslot, slots)
-		kvm_free_physmem_slot(memslot, NULL);
+		kvm_free_physmem_slot(kvm, memslot, NULL);
 
 	kfree(kvm->memslots);
 }
@@ -822,7 +822,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (change == KVM_MR_CREATE) {
 		new.userspace_addr = mem->userspace_addr;
 
-		if (kvm_arch_create_memslot(&new, npages))
+		if (kvm_arch_create_memslot(kvm, &new, npages))
 			goto out_free;
 	}
 
@@ -898,7 +898,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
-	kvm_free_physmem_slot(&old, &new);
+	kvm_free_physmem_slot(kvm, &old, &new);
 	kfree(old_memslots);
 
 	return 0;
@@ -906,7 +906,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 out_slots:
 	kfree(slots);
 out_free:
-	kvm_free_physmem_slot(&new, &old);
+	kvm_free_physmem_slot(kvm, &new, &old);
 out:
 	return r;
 }

From 46a037fd36656b8283c282319876dd8e9c4f1087 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Oct 2013 18:38:13 +0100
Subject: [PATCH 086/287] ARM: KVM: Yield CPU when vcpu executes a WFE

On an (even slightly) oversubscribed system, spinlocks are quickly
becoming a bottleneck, as some vcpus are spinning, waiting for a
lock to be released, while the vcpu holding the lock may not be
running at all.

This creates contention, and the observed slowdown is 40x for
hackbench. No, this isn't a typo.

The solution is to trap blocking WFEs and tell KVM that we're
now spinning. This ensures that other vpus will get a scheduling
boost, allowing the lock to be released more quickly. Also, using
CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT slightly improves the performance
when the VM is severely overcommited.

Quick test to estimate the performance: hackbench 1 process 1000

2xA15 host (baseline):	1.843s

2xA15 guest w/o patch:	2.083s
4xA15 guest w/o patch:	80.212s
8xA15 guest w/o patch:	Could not be bothered to find out

2xA15 guest w/ patch:	2.102s
4xA15 guest w/ patch:	3.205s
8xA15 guest w/ patch:	6.887s

So we go from a 40x degradation to 1.5x in the 2x overcommit case,
which is vaguely more acceptable.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 58d5ec8f8ee318b26b29207874fbaee626973952)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h | 4 +++-
 arch/arm/kvm/Kconfig           | 1 +
 arch/arm/kvm/handle_exit.c     | 6 +++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index d556f03bca17..fe395b7b1ce2 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -67,7 +67,7 @@
  */
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
-			HCR_SWIO | HCR_TIDCP)
+			HCR_TWE | HCR_SWIO | HCR_TIDCP)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
 /* System Control Register (SCTLR) bits */
@@ -208,6 +208,8 @@
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
 
+#define HSR_WFI_IS_WFE		(1U << 0)
+
 #define HSR_HVC_IMM_MASK	((1UL << 16) - 1)
 
 #define HSR_DABT_S1PTW		(1U << 7)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index ebf5015508b5..466bd299b1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	depends on ARM_VIRT_EXT && ARM_LPAE
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index df4c82d47ad7..c4c496f7619c 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -84,7 +84,11 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	trace_kvm_wfi(*vcpu_pc(vcpu));
-	kvm_vcpu_block(vcpu);
+	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
+		kvm_vcpu_on_spin(vcpu);
+	else
+		kvm_vcpu_block(vcpu);
+
 	return 1;
 }
 

From 1391c263d202d6ce8b74f361cd3607c69ae26e77 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 15 Oct 2013 18:10:42 -0700
Subject: [PATCH 087/287] KVM: ARM: Update comments for kvm_handle_wfi

Update comments to reflect what is really going on and add the TWE bit
to the comments in kvm_arm.h.

Also renames the function to kvm_handle_wfx like is done on arm64 for
consistency and uber-correctness.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 86ed81aa2e1ce05a4e7f0819f0dfc34e8d8fb910)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h |  1 +
 arch/arm/kvm/handle_exit.c     | 14 ++++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index fe395b7b1ce2..1d3153c7eb41 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -57,6 +57,7 @@
  * TSC:		Trap SMC
  * TSW:		Trap cache operations by set/way
  * TWI:		Trap WFI
+ * TWE:		Trap WFE
  * TIDCP:	Trap L2CTLR/L2ECTLR
  * BSU_IS:	Upgrade barriers to the inner shareable domain
  * FB:		Force broadcast of all maintainance operations
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index c4c496f7619c..a92079011a83 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -73,15 +73,17 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 /**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
  * @vcpu:	the vcpu pointer
  * @run:	the kvm_run structure pointer
  *
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
  */
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	trace_kvm_wfi(*vcpu_pc(vcpu));
 	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
@@ -93,7 +95,7 @@ static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 static exit_handle_fn arm_exit_handlers[] = {
-	[HSR_EC_WFI]		= kvm_handle_wfi,
+	[HSR_EC_WFI]		= kvm_handle_wfx,
 	[HSR_EC_CP15_32]	= kvm_handle_cp15_32,
 	[HSR_EC_CP15_64]	= kvm_handle_cp15_64,
 	[HSR_EC_CP14_MR]	= kvm_handle_cp14_access,

From cd709bb9fb35e687107761d5feed331d4c2c48e5 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 1 Nov 2012 17:14:45 +0100
Subject: [PATCH 088/287] KVM: ARM: Support hugetlbfs backed huge pages

Support huge pages in KVM/ARM and KVM/ARM64.  The pud_huge checking on
the unmap path may feel a bit silly as the pud_huge check is always
defined to false, but the compiler should be smart about this.

Note: This deals only with VMAs marked as huge which are allocated by
users through hugetlbfs only.  Transparent huge pages can only be
detected by looking at the underlying pages (or the page tables
themselves) and this patch so far simply maps these on a page-by-page
level in the Stage-2 page tables.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit ad361f093c1e31d0b43946210a32ab4ff5c49850)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h         |  17 ++-
 arch/arm/include/asm/pgtable-3level.h  |   2 +
 arch/arm/kvm/mmu.c                     | 169 +++++++++++++++++++------
 arch/arm64/include/asm/kvm_mmu.h       |  12 +-
 arch/arm64/include/asm/pgtable-hwdef.h |   2 +
 5 files changed, 158 insertions(+), 44 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 9b28c41f4ba9..77de4a41cc50 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -62,6 +62,12 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
+static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
+{
+	*pmd = new_pmd;
+	flush_pmd_entry(pmd);
+}
+
 static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 {
 	*pte = new_pte;
@@ -103,9 +109,15 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
 	pte_val(*pte) |= L_PTE_S2_RDWR;
 }
 
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+	pmd_val(*pmd) |= L_PMD_S2_RDWR;
+}
+
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
+					      unsigned long size)
 {
 	/*
 	 * If we are going to insert an instruction page and the icache is
@@ -120,8 +132,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
 	 * need any kind of flushing (DDI 0406C.b - Page B3-1392).
 	 */
 	if (icache_is_pipt()) {
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		__cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+		__cpuc_coherent_user_range(hva, hva + size);
 	} else if (!icache_is_vivt_asid_tagged()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 86b8fe398b95..ad52938bb264 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -113,6 +113,8 @@
 #define L_PTE_S2_RDONLY		 (_AT(pteval_t, 1) << 6)   /* HAP[1]   */
 #define L_PTE_S2_RDWR		 (_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define L_PMD_S2_RDWR		 (_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
 /*
  * Hyp-mode PL2 PTE definitions for LPAE.
  */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b0de86b56c13..745d8b1630cc 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
 #include <linux/io.h>
+#include <linux/hugetlb.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
@@ -41,6 +42,8 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define kvm_pmd_huge(_x)	(pmd_huge(_x))
+
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	/*
@@ -93,19 +96,29 @@ static bool page_empty(void *ptr)
 
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-	pmd_t *pmd_table = pmd_offset(pud, 0);
-	pud_clear(pud);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pmd_free(NULL, pmd_table);
+	if (pud_huge(*pud)) {
+		pud_clear(pud);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	} else {
+		pmd_t *pmd_table = pmd_offset(pud, 0);
+		pud_clear(pud);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		pmd_free(NULL, pmd_table);
+	}
 	put_page(virt_to_page(pud));
 }
 
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
-	pte_t *pte_table = pte_offset_kernel(pmd, 0);
-	pmd_clear(pmd);
-	kvm_tlb_flush_vmid_ipa(kvm, addr);
-	pte_free_kernel(NULL, pte_table);
+	if (kvm_pmd_huge(*pmd)) {
+		pmd_clear(pmd);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	} else {
+		pte_t *pte_table = pte_offset_kernel(pmd, 0);
+		pmd_clear(pmd);
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+		pte_free_kernel(NULL, pte_table);
+	}
 	put_page(virt_to_page(pmd));
 }
 
@@ -136,18 +149,32 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 			continue;
 		}
 
+		if (pud_huge(*pud)) {
+			/*
+			 * If we are dealing with a huge pud, just clear it and
+			 * move on.
+			 */
+			clear_pud_entry(kvm, pud, addr);
+			addr = pud_addr_end(addr, end);
+			continue;
+		}
+
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
 			addr = pmd_addr_end(addr, end);
 			continue;
 		}
 
-		pte = pte_offset_kernel(pmd, addr);
-		clear_pte_entry(kvm, pte, addr);
-		next = addr + PAGE_SIZE;
+		if (!kvm_pmd_huge(*pmd)) {
+			pte = pte_offset_kernel(pmd, addr);
+			clear_pte_entry(kvm, pte, addr);
+			next = addr + PAGE_SIZE;
+		}
 
-		/* If we emptied the pte, walk back up the ladder */
-		if (page_empty(pte)) {
+		/*
+		 * If the pmd entry is to be cleared, walk back up the ladder
+		 */
+		if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
 			next = pmd_addr_end(addr, end);
 			if (page_empty(pmd) && !page_empty(pud)) {
@@ -420,29 +447,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	kvm->arch.pgd = NULL;
 }
 
-
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
-			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			     phys_addr_t addr)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte, old_pte;
 
-	/* Create 2nd stage page table mapping - Level 1 */
 	pgd = kvm->arch.pgd + pgd_index(addr);
 	pud = pud_offset(pgd, addr);
 	if (pud_none(*pud)) {
 		if (!cache)
-			return 0; /* ignore calls from kvm_set_spte_hva */
+			return NULL;
 		pmd = mmu_memory_cache_alloc(cache);
 		pud_populate(NULL, pud, pmd);
 		get_page(virt_to_page(pud));
 	}
 
-	pmd = pmd_offset(pud, addr);
+	return pmd_offset(pud, addr);
+}
 
-	/* Create 2nd stage page table mapping - Level 2 */
+static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+			       *cache, phys_addr_t addr, const pmd_t *new_pmd)
+{
+	pmd_t *pmd, old_pmd;
+
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	VM_BUG_ON(!pmd);
+
+	/*
+	 * Mapping in huge pages should only happen through a fault.  If a
+	 * page is merged into a transparent huge page, the individual
+	 * subpages of that huge page should be unmapped through MMU
+	 * notifiers before we get here.
+	 *
+	 * Merging of CompoundPages is not supported; they should become
+	 * splitting first, unmapped, merged, and mapped back in on-demand.
+	 */
+	VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
+
+	old_pmd = *pmd;
+	kvm_set_pmd(pmd, *new_pmd);
+	if (pmd_present(old_pmd))
+		kvm_tlb_flush_vmid_ipa(kvm, addr);
+	else
+		get_page(virt_to_page(pmd));
+	return 0;
+}
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+			  phys_addr_t addr, const pte_t *new_pte, bool iomap)
+{
+	pmd_t *pmd;
+	pte_t *pte, old_pte;
+
+	/* Create stage-2 page table mapping - Level 1 */
+	pmd = stage2_get_pmd(kvm, cache, addr);
+	if (!pmd) {
+		/*
+		 * Ignore calls from kvm_set_spte_hva for unallocated
+		 * address ranges.
+		 */
+		return 0;
+	}
+
+	/* Create stage-2 page mappings - Level 2 */
 	if (pmd_none(*pmd)) {
 		if (!cache)
 			return 0; /* ignore calls from kvm_set_spte_hva */
@@ -508,15 +577,18 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  gfn_t gfn, struct kvm_memory_slot *memslot,
+			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
 {
-	pte_t new_pte;
-	pfn_t pfn;
 	int ret;
-	bool write_fault, writable;
+	bool write_fault, writable, hugetlb = false;
 	unsigned long mmu_seq;
+	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
+	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+	struct vm_area_struct *vma;
+	pfn_t pfn;
 
 	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -524,6 +596,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 	}
 
+	/* Let's check if we will get back a huge page backed by hugetlbfs */
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma_intersection(current->mm, hva, hva + 1);
+	if (is_vm_hugetlb_page(vma)) {
+		hugetlb = true;
+		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+	}
+	up_read(&current->mm->mmap_sem);
+
 	/* We need minimum second+third level pages */
 	ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
 	if (ret)
@@ -541,26 +622,38 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 */
 	smp_rmb();
 
-	pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
+	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
 	if (is_error_pfn(pfn))
 		return -EFAULT;
 
-	new_pte = pfn_pte(pfn, PAGE_S2);
-	coherent_icache_guest_page(vcpu->kvm, gfn);
-
-	spin_lock(&vcpu->kvm->mmu_lock);
-	if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+	spin_lock(&kvm->mmu_lock);
+	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
-	if (writable) {
-		kvm_set_s2pte_writable(&new_pte);
-		kvm_set_pfn_dirty(pfn);
+
+	if (hugetlb) {
+		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+		new_pmd = pmd_mkhuge(new_pmd);
+		if (writable) {
+			kvm_set_s2pmd_writable(&new_pmd);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
+		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+	} else {
+		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+		if (writable) {
+			kvm_set_s2pte_writable(&new_pte);
+			kvm_set_pfn_dirty(pfn);
+		}
+		coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
 	}
-	stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
+
 
 out_unlock:
-	spin_unlock(&vcpu->kvm->mmu_lock);
+	spin_unlock(&kvm->mmu_lock);
 	kvm_release_pfn_clean(pfn);
-	return 0;
+	return ret;
 }
 
 /**
@@ -629,7 +722,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 
-	ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index efe609c6a3c9..680f74e67497 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -91,6 +91,7 @@ int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
 #define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
+#define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
 
 static inline bool kvm_is_write_fault(unsigned long esr)
 {
@@ -116,13 +117,18 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
 	pte_val(*pte) |= PTE_S2_RDWR;
 }
 
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+	pmd_val(*pmd) |= PMD_S2_RDWR;
+}
+
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
+					      unsigned long size)
 {
 	if (!icache_is_aliasing()) {		/* PIPT */
-		unsigned long hva = gfn_to_hva(kvm, gfn);
-		flush_icache_range(hva, hva + PAGE_SIZE);
+		flush_icache_range(hva, hva + size);
 	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
 		/* any kind of VIPT cache */
 		__flush_icache_all();
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 2e9d83673ef6..b1d2e26c3c88 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -85,6 +85,8 @@
 #define PTE_S2_RDONLY		(_AT(pteval_t, 1) << 6)   /* HAP[2:1] */
 #define PTE_S2_RDWR		(_AT(pteval_t, 3) << 6)   /* HAP[2:1] */
 
+#define PMD_S2_RDWR		(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
+
 /*
  * Memory Attribute override for Stage-2 (MemAttr[3:0])
  */

From ad8b3ca795b8534c292e8916ffdfc36b4bd09940 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 2 Oct 2013 15:32:01 -0700
Subject: [PATCH 089/287] KVM: ARM: Transparent huge page (THP) support

Support transparent huge pages in KVM/ARM and KVM/ARM64.  The
transparent_hugepage_adjust is not very pretty, but this is also how
it's solved on x86 and seems to be simply an artifact on how THPs
behave.  This should eventually be shared across architectures if
possible, but that can always be changed down the road.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 9b5fdb9781f74fb15827e465bfb5aa63211953c8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 58 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 745d8b1630cc..371958370de4 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
-#define kvm_pmd_huge(_x)	(pmd_huge(_x))
+#define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
@@ -576,12 +576,53 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 	return ret;
 }
 
+static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
+{
+	pfn_t pfn = *pfnp;
+	gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+	if (PageTransCompound(pfn_to_page(pfn))) {
+		unsigned long mask;
+		/*
+		 * The address we faulted on is backed by a transparent huge
+		 * page.  However, because we map the compound huge page and
+		 * not the individual tail page, we need to transfer the
+		 * refcount to the head page.  We have to be careful that the
+		 * THP doesn't start to split while we are adjusting the
+		 * refcounts.
+		 *
+		 * We are sure this doesn't happen, because mmu_notifier_retry
+		 * was successful and we are holding the mmu_lock, so if this
+		 * THP is trying to split, it will be blocked in the mmu
+		 * notifier before touching any of the pages, specifically
+		 * before being able to call __split_huge_page_refcount().
+		 *
+		 * We can therefore safely transfer the refcount from PG_tail
+		 * to PG_head and switch the pfn from a tail page to the head
+		 * page accordingly.
+		 */
+		mask = PTRS_PER_PMD - 1;
+		VM_BUG_ON((gfn & mask) != (pfn & mask));
+		if (pfn & mask) {
+			*ipap &= PMD_MASK;
+			kvm_release_pfn_clean(pfn);
+			pfn &= ~mask;
+			kvm_get_pfn(pfn);
+			*pfnp = pfn;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot,
 			  unsigned long fault_status)
 {
 	int ret;
-	bool write_fault, writable, hugetlb = false;
+	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
 	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
 	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
@@ -602,6 +643,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (is_vm_hugetlb_page(vma)) {
 		hugetlb = true;
 		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+	} else {
+		/*
+		 * Pages belonging to VMAs not aligned to the PMD mapping
+		 * granularity cannot be mapped using block descriptors even
+		 * if the pages belong to a THP for the process, because the
+		 * stage-2 block descriptor will cover more than a single THP
+		 * and we loose atomicity for unmapping, updates, and splits
+		 * of the THP or other pages in the stage-2 block range.
+		 */
+		if (vma->vm_start & ~PMD_MASK)
+			force_pte = true;
 	}
 	up_read(&current->mm->mmap_sem);
 
@@ -629,6 +681,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	spin_lock(&kvm->mmu_lock);
 	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
+	if (!hugetlb && !force_pte)
+		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);

From c0acda6fd7c83ca3cc8c5892247ece8c9651c9f3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Oct 2013 18:19:04 +0100
Subject: [PATCH 090/287] ARM: KVM: Fix MPIDR computing to support virtual
 clusters

In order to be able to support more than 4 A7 or A15 CPUs,
we need to fix the MPIDR computing to reflect the fact that
both A15 and A7 can only exist in clusters of at most 4 CPUs.

Fix the MPIDR computing to allow virtual clusters to be exposed
to the guest.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 2d1d841bd44e24b58a3d3cc4fa793670aaa38fbf)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index a629f2c1d0f9..631e6bd0e05f 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -74,11 +74,13 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
 {
 	/*
-	 * Compute guest MPIDR. No need to mess around with different clusters
-	 * but we read the 'U' bit from the underlying hardware directly.
+	 * Compute guest MPIDR. We build a virtual cluster out of the
+	 * vcpu_id, but we read the 'U' bit from the underlying
+	 * hardware directly.
 	 */
-	vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & MPIDR_SMP_BITMASK)
-					| vcpu->vcpu_id;
+	vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
+				     ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
+				     (vcpu->vcpu_id & 3));
 }
 
 /* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */

From 8c02aa50010946b98e5077e5e05b5d99e6286ab1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Oct 2013 18:19:05 +0100
Subject: [PATCH 091/287] ARM: KVM: fix L2CTLR to be per-cluster

The L2CTLR register contains the number of CPUs in this cluster.

Make sure the register content is actually relevant to the vcpu
that is being configured by computing the number of cores that are
part of its cluster.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 9cbb6d969cb6561de45d917b8bb9281cb374bb35)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 631e6bd0e05f..78c0885d6501 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -124,6 +124,10 @@ static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
 	asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
 	l2ctlr &= ~(3 << 24);
 	ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+	/* How many cores in the current cluster and the next ones */
+	ncores -= (vcpu->vcpu_id & ~3);
+	/* Cap it to the maximum number of cores in a single cluster */
+	ncores = min(ncores, 3U);
 	l2ctlr |= (ncores & 3) << 24;
 
 	vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;

From c93a79267ff11590dd7835c6622bd9f29ef43e73 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Oct 2013 18:19:06 +0100
Subject: [PATCH 092/287] ARM: KVM: drop limitation to 4 CPU VMs

Now that the KVM/arm code knows about affinity, remove the hard
limit of 4 vcpus per VM.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 7999b4d18211bcfb40e3574cf75e94518e9fa2c6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/reset.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index d153e64d1255..f558c073c023 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -33,8 +33,6 @@
  * Cortex-A15 and Cortex-A7 Reset Values
  */
 
-static const int cortexa_max_cpu_idx = 3;
-
 static struct kvm_regs cortexa_regs_reset = {
 	.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
 };
@@ -64,8 +62,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 	switch (vcpu->arch.target) {
 	case KVM_ARM_TARGET_CORTEX_A7:
 	case KVM_ARM_TARGET_CORTEX_A15:
-		if (vcpu->vcpu_id > cortexa_max_cpu_idx)
-			return -EINVAL;
 		reset_regs = &cortexa_regs_reset;
 		vcpu->arch.midr = read_cpuid_id();
 		cpu_vtimer_irq = &cortexa_vtimer_irq;

From 3d6b7ab3028ceacadd0a29b4757a788e24987d10 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 18 Oct 2013 18:19:03 +0100
Subject: [PATCH 093/287] arm/arm64: KVM: PSCI: use MPIDR to identify a target
 CPU

The KVM PSCI code blindly assumes that vcpu_id and MPIDR are
the same thing. This is true when vcpus are organized as a flat
topology, but is wrong when trying to emulate any other topology
(such as A15 clusters).

Change the KVM PSCI CPU_ON code to look at the MPIDR instead
of the vcpu_id to pick a target CPU.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 79c648806f9034abf54332b78043bb242189d953)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   |  5 +++++
 arch/arm/kvm/psci.c                  | 17 +++++++++++++----
 arch/arm64/include/asm/kvm_emulate.h |  5 +++++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index a464e8d7b6c5..708e4d8a647f 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -157,4 +157,9 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
 }
 
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.cp15[c0_MPIDR];
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 86a693a02ba3..311263124acf 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -18,6 +18,7 @@
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
+#include <asm/cputype.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_psci.h>
 
@@ -34,22 +35,30 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 {
 	struct kvm *kvm = source_vcpu->kvm;
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu = NULL, *tmp;
 	wait_queue_head_t *wq;
 	unsigned long cpu_id;
+	unsigned long mpidr;
 	phys_addr_t target_pc;
+	int i;
 
 	cpu_id = *vcpu_reg(source_vcpu, 1);
 	if (vcpu_mode_is_32bit(source_vcpu))
 		cpu_id &= ~((u32) 0);
 
-	if (cpu_id >= atomic_read(&kvm->online_vcpus))
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
+			vcpu = tmp;
+			break;
+		}
+	}
+
+	if (!vcpu)
 		return KVM_PSCI_RET_INVAL;
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
 
-	vcpu = kvm_get_vcpu(kvm, cpu_id);
-
 	wq = kvm_arch_vcpu_wq(vcpu);
 	if (!waitqueue_active(wq))
 		return KVM_PSCI_RET_INVAL;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index eec073875218..6df93cdc652b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -177,4 +177,9 @@ static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
 	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
 }
 
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+	return vcpu_sys_reg(vcpu, MPIDR_EL1);
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */

From b695de83412eb8ff7c77b215fa24bca5b1871618 Mon Sep 17 00:00:00 2001
From: Yang Zhang <yang.z.zhang@Intel.com>
Date: Thu, 24 Oct 2013 09:56:39 +0800
Subject: [PATCH 094/287] KVM: Mapping IOMMU pages after updating memslot

In kvm_iommu_map_pages(), we need to know the page size via call
kvm_host_page_size(). And it will check whether the target slot
is valid before return the right page size.
Currently, we will map the iommu pages when creating a new slot.
But we call kvm_iommu_map_pages() during preparing the new slot.
At that time, the new slot is not visible by domain(still in preparing).
So we cannot get the right page size from kvm_host_page_size() and
this will break the IOMMU super page logic.
The solution is to map the iommu pages after we insert the new slot
into domain.

Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Tested-by: Patrick Lu <patrick.lu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e0230e1327fb862c9b6cde24ae62d55f9db62c9b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c777a6e582f0..340c97cf55b6 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -873,21 +873,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 			goto out_free;
 	}
 
-	/*
-	 * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
-	 * un-mapped and re-mapped if their base changes.  Since base change
-	 * unmapping is handled above with slot deletion, mapping alone is
-	 * needed here.  Anything else the iommu might care about for existing
-	 * slots (size changes, userspace addr changes and read-only flag
-	 * changes) is disallowed above, so any other attribute changes getting
-	 * here can be skipped.
-	 */
-	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
-		r = kvm_iommu_map_pages(kvm, &new);
-		if (r)
-			goto out_slots;
-	}
-
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (change == KVM_MR_DELETE) {
 		new.dirty_bitmap = NULL;
@@ -901,6 +886,20 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	kvm_free_physmem_slot(kvm, &old, &new);
 	kfree(old_memslots);
 
+	/*
+	 * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
+	 * un-mapped and re-mapped if their base changes.  Since base change
+	 * unmapping is handled above with slot deletion, mapping alone is
+	 * needed here.  Anything else the iommu might care about for existing
+	 * slots (size changes, userspace addr changes and read-only flag
+	 * changes) is disallowed above, so any other attribute changes getting
+	 * here can be skipped.
+	 */
+	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
+		r = kvm_iommu_map_pages(kvm, &new);
+		return r;
+	}
+
 	return 0;
 
 out_slots:

From c0cdef185a6b5c8f3c5e6b923e95774c36764011 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 2 Aug 2013 11:41:13 +0100
Subject: [PATCH 095/287] arm64: KVM: Yield CPU when vcpu executes a WFE

On an (even slightly) oversubscribed system, spinlocks are quickly
becoming a bottleneck, as some vcpus are spinning, waiting for a
lock to be released, while the vcpu holding the lock may not be
running at all.

The solution is to trap blocking WFEs and tell KVM that we're
now spinning. This ensures that other vpus will get a scheduling
boost, allowing the lock to be released more quickly. Also, using
CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT slightly improves the performance
when the VM is severely overcommited.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit d241aac798eb042e605f78c31a4122e583b2cd13)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  8 ++++++--
 arch/arm64/kvm/Kconfig           |  1 +
 arch/arm64/kvm/handle_exit.c     | 18 +++++++++++++-----
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index a5f28e2720c7..c98ef4771c73 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -63,6 +63,7 @@
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
  * TSW:		Trap cache operations by set/way
+ * TWE:		Trap WFE
  * TWI:		Trap WFI
  * TIDCP:	Trap L2CTLR/L2ECTLR
  * BSU_IS:	Upgrade barriers to the inner shareable domain
@@ -72,8 +73,9 @@
  * FMO:		Override CPSR.F and enable signaling with VF
  * SWIO:	Turn set/way invalidates into set/way clean+invalidate
  */
-#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
-			 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
+			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_AMO | HCR_IMO | HCR_FMO | \
 			 HCR_SWIO | HCR_TIDCP | HCR_RW)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
@@ -242,4 +244,6 @@
 
 #define ESR_EL2_EC_xABT_xFSR_EXTABT	0x10
 
+#define ESR_EL2_EC_WFI_ISS_WFE	(1 << 0)
+
 #endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 21e90820bd23..4480ab339a00 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
 	select KVM_ARM_VGIC
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 9beaca033437..8da56067c304 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -47,21 +47,29 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 }
 
 /**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
+ *		    instruction executed by a guest
+ *
  * @vcpu:	the vcpu pointer
  *
- * Simply call kvm_vcpu_block(), which will halt execution of
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
  * world-switches and schedule other host processes until there is an
  * incoming IRQ or FIQ to the VM.
  */
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	kvm_vcpu_block(vcpu);
+	if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE)
+		kvm_vcpu_on_spin(vcpu);
+	else
+		kvm_vcpu_block(vcpu);
+
 	return 1;
 }
 
 static exit_handle_fn arm_exit_handlers[] = {
-	[ESR_EL2_EC_WFI]	= kvm_handle_wfi,
+	[ESR_EL2_EC_WFI]	= kvm_handle_wfx,
 	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
 	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
 	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_access,

From e845f9d367f2ac8197a142fd29d1baa8fcc4dd75 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 30 Oct 2013 12:12:13 +0100
Subject: [PATCH 096/287] KVM: use a more sensible error number when debugfs
 directory creation fails

I don't know if this was due to cut and paste, or somebody was really
using a D20 to pick the error code for kvm_init_debugfs as suggested by
Linus (EFAULT is 14, so the possibility cannot be entirely ruled out).

In any case, this patch fixes it.

Reported-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0c8eb04a6241da28deb108181213b791c378123b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 340c97cf55b6..d6b7d797cb16 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3010,7 +3010,7 @@ static const struct file_operations *stat_fops[] = {
 
 static int kvm_init_debug(void)
 {
-	int r = -EFAULT;
+	int r = -EEXIST;
 	struct kvm_stats_debugfs_item *p;
 
 	kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);

From b4fe3057a0c543ed42aa2a69a6ab9a5312571c34 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 22 Sep 2013 16:44:50 +0200
Subject: [PATCH 097/287] kvm: Add KVM_GET_EMULATED_CPUID

Add a kvm ioctl which states which system functionality kvm emulates.
The format used is that of CPUID and we return the corresponding CPUID
bits set for which we do emulate functionality.

Make sure ->padding is being passed on clean from userspace so that we
can use it for something in the future, after the ioctl gets cast in
stone.

s/kvm_dev_ioctl_get_supported_cpuid/kvm_dev_ioctl_get_cpuid/ while at
it.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9c15bb1d0a8411f9bb3395d21d5309bde7da0c1c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt | 77 +++++++++++++++++++++++++++++--
 arch/x86/include/uapi/asm/kvm.h   |  6 +--
 arch/x86/kvm/cpuid.c              | 57 ++++++++++++++++++++---
 arch/x86/kvm/cpuid.h              |  5 +-
 arch/x86/kvm/x86.c                |  9 ++--
 include/uapi/linux/kvm.h          |  2 +
 6 files changed, 139 insertions(+), 17 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 9bfadeb8be31..d196ebe8956e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1122,9 +1122,9 @@ struct kvm_cpuid2 {
 	struct kvm_cpuid_entry2 entries[0];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
 
 struct kvm_cpuid_entry2 {
 	__u32 function;
@@ -2661,6 +2661,77 @@ and usually define the validity of a groups of registers. (e.g. one bit
 };
 
 
+4.81 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+	__u32 nent;
+	__u32 flags;
+	struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
+
+struct kvm_cpuid_entry2 {
+	__u32 function;
+	__u32 index;
+	__u32 flags;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+  function: the eax value used to obtain the entry
+  index: the ecx value used to obtain the entry (for entries that are
+         affected by ecx)
+  flags: an OR of zero or more of the following:
+        KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+           if the index field is valid
+        KVM_CPUID_FLAG_STATEFUL_FUNC:
+           if cpuid for this function returns different values for successive
+           invocations; there will be several entries with the same function,
+           all with this flag set
+        KVM_CPUID_FLAG_STATE_READ_NEXT:
+           for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+           the first entry to be read by a cpu
+   eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+         this function/index combination
+
+
 6. Capabilities that can be enabled
 -----------------------------------
 
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5d9a3033b3d7..d3a87780c70b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 {
 	__u32 padding[3];
 };
 
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC    2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT  4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
 
 /* for KVM_SET_CPUID2 */
 struct kvm_cpuid2 {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a20ecb5b6cbf..89d288237b9c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -187,8 +187,14 @@ static bool supported_xcr0_bit(unsigned bit)
 
 #define F(x) bit(X86_FEATURE_##x)
 
-static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
-			 u32 index, int *nent, int maxnent)
+static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
+				   u32 func, u32 index, int *nent, int maxnent)
+{
+	return 0;
+}
+
+static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+				 u32 index, int *nent, int maxnent)
 {
 	int r;
 	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -480,6 +486,15 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	return r;
 }
 
+static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
+			u32 idx, int *nent, int maxnent, unsigned int type)
+{
+	if (type == KVM_GET_EMULATED_CPUID)
+		return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
+
+	return __do_cpuid_ent(entry, func, idx, nent, maxnent);
+}
+
 #undef F
 
 struct kvm_cpuid_param {
@@ -494,8 +509,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
 	return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
 }
 
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				      struct kvm_cpuid_entry2 __user *entries)
+static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
+				 __u32 num_entries, unsigned int ioctl_type)
+{
+	int i;
+
+	if (ioctl_type != KVM_GET_EMULATED_CPUID)
+		return false;
+
+	/*
+	 * We want to make sure that ->padding is being passed clean from
+	 * userspace in case we want to use it for something in the future.
+	 *
+	 * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
+	 * have to give ourselves satisfied only with the emulated side. /me
+	 * sheds a tear.
+	 */
+	for (i = 0; i < num_entries; i++) {
+		if (entries[i].padding[0] ||
+		    entries[i].padding[1] ||
+		    entries[i].padding[2])
+			return true;
+	}
+	return false;
+}
+
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+			    struct kvm_cpuid_entry2 __user *entries,
+			    unsigned int type)
 {
 	struct kvm_cpuid_entry2 *cpuid_entries;
 	int limit, nent = 0, r = -E2BIG, i;
@@ -512,6 +553,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 		goto out;
 	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 		cpuid->nent = KVM_MAX_CPUID_ENTRIES;
+
+	if (sanity_check_entries(entries, cpuid->nent, type))
+		return -EINVAL;
+
 	r = -ENOMEM;
 	cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
 	if (!cpuid_entries)
@@ -525,7 +570,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 			continue;
 
 		r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
-				&nent, cpuid->nent);
+				&nent, cpuid->nent, type);
 
 		if (r)
 			goto out_free;
@@ -536,7 +581,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 		limit = cpuid_entries[nent - 1].eax;
 		for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
 			r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
-				     &nent, cpuid->nent);
+				     &nent, cpuid->nent, type);
 
 		if (r)
 			goto out_free;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index b7fd07984888..f1e4895174b2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,8 +6,9 @@
 void kvm_update_cpuid(struct kvm_vcpu *vcpu);
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 					      u32 function, u32 index);
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
-				      struct kvm_cpuid_entry2 __user *entries);
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+			    struct kvm_cpuid_entry2 __user *entries,
+			    unsigned int type);
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			     struct kvm_cpuid *cpuid,
 			     struct kvm_cpuid_entry __user *entries);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 68b139fe0dbd..5276618579d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2510,6 +2510,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
+	case KVM_CAP_EXT_EMUL_CPUID:
 	case KVM_CAP_CLOCKSOURCE:
 	case KVM_CAP_PIT:
 	case KVM_CAP_NOP_IO_DELAY:
@@ -2619,15 +2620,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_GET_SUPPORTED_CPUID: {
+	case KVM_GET_SUPPORTED_CPUID:
+	case KVM_GET_EMULATED_CPUID: {
 		struct kvm_cpuid2 __user *cpuid_arg = argp;
 		struct kvm_cpuid2 cpuid;
 
 		r = -EFAULT;
 		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
 			goto out;
-		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
-						      cpuid_arg->entries);
+
+		r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
+					    ioctl);
 		if (r)
 			goto out;
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cba62019348d..3d365d191145 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -541,6 +541,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
 #define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
 #define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
+#define KVM_GET_EMULATED_CPUID	  _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
 
 /*
  * Extension capability list.
@@ -667,6 +668,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_RTAS 91
 #define KVM_CAP_IRQ_XICS 92
 #define KVM_CAP_ARM_EL1_32BIT 93
+#define KVM_CAP_EXT_EMUL_CPUID 95
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

From a7dc5f55357702cc04773cd9e92a853767209d7d Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 30 Oct 2013 11:02:17 -0600
Subject: [PATCH 098/287] kvm: Add VFIO device

So far we've succeeded at making KVM and VFIO mostly unaware of each
other, but areas are cropping up where a connection beyond eventfds
and irqfds needs to be made.  This patch introduces a KVM-VFIO device
that is meant to be a gateway for such interaction.  The user creates
the device and can add and remove VFIO groups to it via file
descriptors.  When a group is added, KVM verifies the group is valid
and gets a reference to it via the VFIO external user interface.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit ec53500fae421e07c5d035918ca454a429732ef4)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/devices/vfio.txt |  22 +++
 arch/x86/kvm/Kconfig                       |   1 +
 arch/x86/kvm/Makefile                      |   2 +-
 include/linux/kvm_host.h                   |   1 +
 include/uapi/linux/kvm.h                   |   4 +
 virt/kvm/Kconfig                           |   3 +
 virt/kvm/kvm_main.c                        |   5 +
 virt/kvm/vfio.c                            | 220 +++++++++++++++++++++
 8 files changed, 257 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/virtual/kvm/devices/vfio.txt
 create mode 100644 virt/kvm/vfio.c

diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644
index 000000000000..ef51740c67ca
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -0,0 +1,22 @@
+VFIO virtual device
+===================
+
+Device types supported:
+  KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM.  The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM.  As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence.  When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+  KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+  KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+  KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+
+For each, kvm_device_attr.addr points to an int32_t file descriptor
+for the VFIO group.
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a47a3e54b964..b89c5db2b832 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -38,6 +38,7 @@ config KVM
 	select PERF_EVENTS
 	select HAVE_KVM_MSI
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
+	select KVM_VFIO
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index bf4fb04d0112..25d22b2d6509 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,7 +9,7 @@ KVM := ../../../virt/kvm
 
 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
 				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
-				$(KVM)/eventfd.o $(KVM)/irqchip.o
+				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4de0a8fedf3f..214fc2d84366 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1045,6 +1045,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
+extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3d365d191145..d5b8501cebfd 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -844,6 +844,10 @@ struct kvm_device_attr {
 #define KVM_DEV_TYPE_FSL_MPIC_20	1
 #define KVM_DEV_TYPE_FSL_MPIC_42	2
 #define KVM_DEV_TYPE_XICS		3
+#define KVM_DEV_TYPE_VFIO		4
+#define  KVM_DEV_VFIO_GROUP			1
+#define   KVM_DEV_VFIO_GROUP_ADD			1
+#define   KVM_DEV_VFIO_GROUP_DEL			2
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 779262f59e25..fbe1a48bd629 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -27,3 +27,6 @@ config HAVE_KVM_MSI
 
 config HAVE_KVM_CPU_RELAX_INTERCEPT
        bool
+
+config KVM_VFIO
+       bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d6b7d797cb16..652d682b1d55 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2270,6 +2270,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	case KVM_DEV_TYPE_XICS:
 		ops = &kvm_xics_ops;
 		break;
+#endif
+#ifdef CONFIG_KVM_VFIO
+	case KVM_DEV_TYPE_VFIO:
+		ops = &kvm_vfio_ops;
+		break;
 #endif
 	default:
 		return -ENODEV;
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
new file mode 100644
index 000000000000..597c258245ea
--- /dev/null
+++ b/virt/kvm/vfio.c
@@ -0,0 +1,220 @@
+/*
+ * VFIO-KVM bridge pseudo device
+ *
+ * Copyright (C) 2013 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+struct kvm_vfio_group {
+	struct list_head node;
+	struct vfio_group *vfio_group;
+};
+
+struct kvm_vfio {
+	struct list_head group_list;
+	struct mutex lock;
+};
+
+static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
+{
+	struct vfio_group *vfio_group;
+	struct vfio_group *(*fn)(struct file *);
+
+	fn = symbol_get(vfio_group_get_external_user);
+	if (!fn)
+		return ERR_PTR(-EINVAL);
+
+	vfio_group = fn(filep);
+
+	symbol_put(vfio_group_get_external_user);
+
+	return vfio_group;
+}
+
+static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
+{
+	void (*fn)(struct vfio_group *);
+
+	fn = symbol_get(vfio_group_put_external_user);
+	if (!fn)
+		return;
+
+	fn(vfio_group);
+
+	symbol_put(vfio_group_put_external_user);
+}
+
+static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct vfio_group *vfio_group;
+	struct kvm_vfio_group *kvg;
+	void __user *argp = (void __user *)arg;
+	struct fd f;
+	int32_t fd;
+	int ret;
+
+	switch (attr) {
+	case KVM_DEV_VFIO_GROUP_ADD:
+		if (get_user(fd, (int32_t __user *)argp))
+			return -EFAULT;
+
+		f = fdget(fd);
+		if (!f.file)
+			return -EBADF;
+
+		vfio_group = kvm_vfio_group_get_external_user(f.file);
+		fdput(f);
+
+		if (IS_ERR(vfio_group))
+			return PTR_ERR(vfio_group);
+
+		mutex_lock(&kv->lock);
+
+		list_for_each_entry(kvg, &kv->group_list, node) {
+			if (kvg->vfio_group == vfio_group) {
+				mutex_unlock(&kv->lock);
+				kvm_vfio_group_put_external_user(vfio_group);
+				return -EEXIST;
+			}
+		}
+
+		kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
+		if (!kvg) {
+			mutex_unlock(&kv->lock);
+			kvm_vfio_group_put_external_user(vfio_group);
+			return -ENOMEM;
+		}
+
+		list_add_tail(&kvg->node, &kv->group_list);
+		kvg->vfio_group = vfio_group;
+
+		mutex_unlock(&kv->lock);
+
+		return 0;
+
+	case KVM_DEV_VFIO_GROUP_DEL:
+		if (get_user(fd, (int32_t __user *)argp))
+			return -EFAULT;
+
+		f = fdget(fd);
+		if (!f.file)
+			return -EBADF;
+
+		vfio_group = kvm_vfio_group_get_external_user(f.file);
+		fdput(f);
+
+		if (IS_ERR(vfio_group))
+			return PTR_ERR(vfio_group);
+
+		ret = -ENOENT;
+
+		mutex_lock(&kv->lock);
+
+		list_for_each_entry(kvg, &kv->group_list, node) {
+			if (kvg->vfio_group != vfio_group)
+				continue;
+
+			list_del(&kvg->node);
+			kvm_vfio_group_put_external_user(kvg->vfio_group);
+			kfree(kvg);
+			ret = 0;
+			break;
+		}
+
+		mutex_unlock(&kv->lock);
+
+		kvm_vfio_group_put_external_user(vfio_group);
+
+		return ret;
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_vfio_set_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_VFIO_GROUP:
+		return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_vfio_has_attr(struct kvm_device *dev,
+			     struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_VFIO_GROUP:
+		switch (attr->attr) {
+		case KVM_DEV_VFIO_GROUP_ADD:
+		case KVM_DEV_VFIO_GROUP_DEL:
+			return 0;
+		}
+
+		break;
+	}
+
+	return -ENXIO;
+}
+
+static void kvm_vfio_destroy(struct kvm_device *dev)
+{
+	struct kvm_vfio *kv = dev->private;
+	struct kvm_vfio_group *kvg, *tmp;
+
+	list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+		kvm_vfio_group_put_external_user(kvg->vfio_group);
+		list_del(&kvg->node);
+		kfree(kvg);
+	}
+
+	kfree(kv);
+	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
+}
+
+static int kvm_vfio_create(struct kvm_device *dev, u32 type)
+{
+	struct kvm_device *tmp;
+	struct kvm_vfio *kv;
+
+	/* Only one VFIO "device" per VM */
+	list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
+		if (tmp->ops == &kvm_vfio_ops)
+			return -EBUSY;
+
+	kv = kzalloc(sizeof(*kv), GFP_KERNEL);
+	if (!kv)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&kv->group_list);
+	mutex_init(&kv->lock);
+
+	dev->private = kv;
+
+	return 0;
+}
+
+struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};

From 7f17a13bdd7902c3e4d55c273820e3099c1e1d33 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 30 Oct 2013 21:43:01 +0200
Subject: [PATCH 099/287] kvm_host: typo fix

fix up typo in comment.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 81e87e26796782e014fd1f2bb9cd8fb6ce4021a8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 214fc2d84366..6d24a2671d29 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -775,7 +775,7 @@ static inline void kvm_guest_enter(void)
 
 	/* KVM does not hold any references to rcu protected data when it
 	 * switches CPU into a guest mode. In fact switching to a guest mode
-	 * is very similar to exiting to userspase from rcu point of view. In
+	 * is very similar to exiting to userspace from rcu point of view. In
 	 * addition CPU may stay in a guest mode for quite a long time (up to
 	 * one time slice). Lets treat guest mode as quiescent state, just like
 	 * we do with user-mode execution.

From d49c7a4473eebfda645f469c7a2e77846c8008e2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 5 Nov 2013 16:04:18 +0200
Subject: [PATCH 100/287] KVM: remove vm mmap method

It was used in conjunction with KVM_SET_MEMORY_REGION ioctl which was
removed by b74a07beed0 in 2010, QEMU stopped using it in 2008, so
it is time to remove the code finally.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 80f5b5e700fa9c58480eafce0d47367bafb70006)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 652d682b1d55..dd9ce144cf99 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2524,44 +2524,12 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 }
 #endif
 
-static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *page[1];
-	unsigned long addr;
-	int npages;
-	gfn_t gfn = vmf->pgoff;
-	struct kvm *kvm = vma->vm_file->private_data;
-
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr))
-		return VM_FAULT_SIGBUS;
-
-	npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
-				NULL);
-	if (unlikely(npages != 1))
-		return VM_FAULT_SIGBUS;
-
-	vmf->page = page[0];
-	return 0;
-}
-
-static const struct vm_operations_struct kvm_vm_vm_ops = {
-	.fault = kvm_vm_fault,
-};
-
-static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	vma->vm_ops = &kvm_vm_vm_ops;
-	return 0;
-}
-
 static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = kvm_vm_compat_ioctl,
 #endif
-	.mmap           = kvm_vm_mmap,
 	.llseek		= noop_llseek,
 };
 

From 60979ebbbbe9d4c56e87cffcaa92933d96637d7b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Tue, 5 Nov 2013 18:29:45 +0000
Subject: [PATCH 101/287] arm64: KVM: initialize HYP mode following the kernel
 endianness

Force SCTLR_EL2.EE to 1 if the kernel is compiled as BE.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 18ea3dbc9e5c8a53a361b17c4a5676ea6f4bcb72)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp-init.S | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index ba84e6705e20..2b0244d65c16 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -74,7 +74,10 @@ __do_hyp_init:
 	msr	mair_el2, x4
 	isb
 
-	mov	x4, #SCTLR_EL2_FLAGS
+	mrs	x4, sctlr_el2
+	and	x4, x4, #SCTLR_EL2_EE	// preserve endianness of EL2
+	ldr	x5, =SCTLR_EL2_FLAGS
+	orr	x4, x4, x5
 	msr	sctlr_el2, x4
 	isb
 

From 128a021aa83b73d07153f8fee5dd22e6933f62e2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <Marc.Zyngier@arm.com>
Date: Tue, 5 Nov 2013 18:29:46 +0000
Subject: [PATCH 102/287] arm64: KVM: vgic: byteswap GICv2 access on world
 switch if BE

Ensure that accesses to the GICH_* registers are byteswapped
when the kernel is compiled as big-endian.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit c5b2c0f5203b3bc678a8967daedf7114029975ae)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 1ac0bbbdddb2..3b47c36e10ff 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -403,6 +403,14 @@ __kvm_hyp_code_start:
 	ldr	w9, [x2, #GICH_ELRSR0]
 	ldr	w10, [x2, #GICH_ELRSR1]
 	ldr	w11, [x2, #GICH_APR]
+CPU_BE(	rev	w4,  w4  )
+CPU_BE(	rev	w5,  w5  )
+CPU_BE(	rev	w6,  w6  )
+CPU_BE(	rev	w7,  w7  )
+CPU_BE(	rev	w8,  w8  )
+CPU_BE(	rev	w9,  w9  )
+CPU_BE(	rev	w10, w10 )
+CPU_BE(	rev	w11, w11 )
 
 	str	w4, [x3, #VGIC_CPU_HCR]
 	str	w5, [x3, #VGIC_CPU_VMCR]
@@ -421,6 +429,7 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
 	add	x3, x3, #VGIC_CPU_LR
 1:	ldr	w5, [x2], #4
+CPU_BE(	rev	w5, w5 )
 	str	w5, [x3], #4
 	sub	w4, w4, #1
 	cbnz	w4, 1b
@@ -446,6 +455,9 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_HCR]
 	ldr	w5, [x3, #VGIC_CPU_VMCR]
 	ldr	w6, [x3, #VGIC_CPU_APR]
+CPU_BE(	rev	w4, w4 )
+CPU_BE(	rev	w5, w5 )
+CPU_BE(	rev	w6, w6 )
 
 	str	w4, [x2, #GICH_HCR]
 	str	w5, [x2, #GICH_VMCR]
@@ -456,6 +468,7 @@ __kvm_hyp_code_start:
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
 	add	x3, x3, #VGIC_CPU_LR
 1:	ldr	w5, [x3], #4
+CPU_BE(	rev	w5, w5 )
 	str	w5, [x2], #4
 	sub	w4, w4, #1
 	cbnz	w4, 1b

From dd8858820ec48b90c5ab5c6631f50a613deb55bc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 12 Feb 2013 12:40:22 +0000
Subject: [PATCH 103/287] arm/arm64: KVM: MMIO support for BE guest

Do the necessary byteswap when host and guest have different
views of the universe. Actually, the only case we need to take
care of is when the guest is BE. All the other cases are naturally
handled.

Also be careful about endianness when the data is being memcopy-ed
from/to the run buffer.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6d89d2d9b5bac9dbe40ee106ceda9307b6265234)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 41 +++++++++++++
 arch/arm/kvm/mmio.c                  | 86 ++++++++++++++++++++++++----
 arch/arm64/include/asm/kvm_emulate.h | 48 ++++++++++++++++
 3 files changed, 164 insertions(+), 11 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 708e4d8a647f..b79cd8d3d6ee 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -162,4 +162,45 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cp15[c0_MPIDR];
 }
 
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+	return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return be16_to_cpu(data & 0xffff);
+		default:
+			return be32_to_cpu(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_be16(data & 0xffff);
+		default:
+			return cpu_to_be32(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
 #endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 0c25d9487d53..4cb5a93182e9 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -23,6 +23,68 @@
 
 #include "trace.h"
 
+static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
+{
+	void *datap = NULL;
+	union {
+		u8	byte;
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		tmp.byte	= data;
+		datap		= &tmp.byte;
+		break;
+	case 2:
+		tmp.hword	= data;
+		datap		= &tmp.hword;
+		break;
+	case 4:
+		tmp.word	= data;
+		datap		= &tmp.word;
+		break;
+	case 8:
+		tmp.dword	= data;
+		datap		= &tmp.dword;
+		break;
+	}
+
+	memcpy(buf, datap, len);
+}
+
+static unsigned long mmio_read_buf(char *buf, unsigned int len)
+{
+	unsigned long data = 0;
+	union {
+		u16	hword;
+		u32	word;
+		u64	dword;
+	} tmp;
+
+	switch (len) {
+	case 1:
+		data = buf[0];
+		break;
+	case 2:
+		memcpy(&tmp.hword, buf, len);
+		data = tmp.hword;
+		break;
+	case 4:
+		memcpy(&tmp.word, buf, len);
+		data = tmp.word;
+		break;
+	case 8:
+		memcpy(&tmp.dword, buf, len);
+		data = tmp.dword;
+		break;
+	}
+
+	return data;
+}
+
 /**
  * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
  * @vcpu: The VCPU pointer
@@ -33,28 +95,27 @@
  */
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	unsigned long *dest;
+	unsigned long data;
 	unsigned int len;
 	int mask;
 
 	if (!run->mmio.is_write) {
-		dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
-		*dest = 0;
-
 		len = run->mmio.len;
 		if (len > sizeof(unsigned long))
 			return -EINVAL;
 
-		memcpy(dest, run->mmio.data, len);
-
-		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-				*((u64 *)run->mmio.data));
+		data = mmio_read_buf(run->mmio.data, len);
 
 		if (vcpu->arch.mmio_decode.sign_extend &&
 		    len < sizeof(unsigned long)) {
 			mask = 1U << ((len * 8) - 1);
-			*dest = (*dest ^ mask) - mask;
+			data = (data ^ mask) - mask;
 		}
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+			       data);
+		data = vcpu_data_host_to_guest(vcpu, data, len);
+		*vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
 	}
 
 	return 0;
@@ -105,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		 phys_addr_t fault_ipa)
 {
 	struct kvm_exit_mmio mmio;
+	unsigned long data;
 	unsigned long rt;
 	int ret;
 
@@ -125,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
+	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
+
 	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
 					 KVM_TRACE_MMIO_READ_UNSATISFIED,
 			mmio.len, fault_ipa,
-			(mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0);
+			(mmio.is_write) ? data : 0);
 
 	if (mmio.is_write)
-		memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len);
+		mmio_write_buf(mmio.data, mmio.len, data);
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 6df93cdc652b..291f87cf457f 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -182,4 +182,52 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 	return vcpu_sys_reg(vcpu, MPIDR_EL1);
 }
 
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
+
+	return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return be16_to_cpu(data & 0xffff);
+		case 4:
+			return be32_to_cpu(data & 0xffffffff);
+		default:
+			return be64_to_cpu(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+						    unsigned long data,
+						    unsigned int len)
+{
+	if (kvm_vcpu_is_be(vcpu)) {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_be16(data & 0xffff);
+		case 4:
+			return cpu_to_be32(data & 0xffffffff);
+		default:
+			return cpu_to_be64(data);
+		}
+	}
+
+	return data;		/* Leave LE untouched */
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */

From 53e38964402dd81c8528f3d2c6fd119c4091a390 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 5 Nov 2013 14:12:15 +0000
Subject: [PATCH 104/287] arm/arm64: KVM: PSCI: propagate caller endianness to
 the incoming vcpu

When booting a vcpu using PSCI, make sure we start it with the
endianness of the caller. Otherwise, secondaries can be pretty
unhappy to execute a BE kernel in LE mode...

This conforms to PSCI spec Rev B, 5.13.3.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit ce94fe93d566bf381c6ecbd45010d36c5f04d692)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 5 +++++
 arch/arm/kvm/psci.c                  | 4 ++++
 arch/arm64/include/asm/kvm_emulate.h | 8 ++++++++
 3 files changed, 17 insertions(+)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b79cd8d3d6ee..0fa90c962ac8 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -162,6 +162,11 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 	return vcpu->arch.cp15[c0_MPIDR];
 }
 
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+	*vcpu_cpsr(vcpu) |= PSR_E_BIT;
+}
+
 static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
 {
 	return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 311263124acf..0881bf169fbc 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -71,6 +71,10 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		vcpu_set_thumb(vcpu);
 	}
 
+	/* Propagate caller endianness */
+	if (kvm_vcpu_is_be(source_vcpu))
+		kvm_vcpu_set_be(vcpu);
+
 	*vcpu_pc(vcpu) = target_pc;
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 291f87cf457f..dd8ecfc3f995 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -182,6 +182,14 @@ static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
 	return vcpu_sys_reg(vcpu, MPIDR_EL1);
 }
 
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+	if (vcpu_mode_is_32bit(vcpu))
+		*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
+	else
+		vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+}
+
 static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
 {
 	if (vcpu_mode_is_32bit(vcpu))

From 52031ff6011942b7e29788b9ce5066cb45fe3e4e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 15 Nov 2013 13:14:12 -0800
Subject: [PATCH 105/287] arm/arm64: KVM: Fix hyp mappings of vmalloc regions

Using virt_to_phys on percpu mappings is horribly wrong as it may be
backed by vmalloc.  Introduce kvm_kaddr_to_phys which translates both
types of valid kernel addresses to the corresponding physical address.

At the same time resolves a typing issue where we were storing the
physical address as a 32 bit unsigned long (on arm), truncating the
physical address for addresses above the 4GB limit.  This caused
breakage on Keystone.

Cc: <stable@vger.kernel.org>	[3.10+]
Reported-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 40c2729bab48e2832b17c1fa8af9db60e776131b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 371958370de4..580906989db1 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -334,6 +334,17 @@ static int __create_hyp_mappings(pgd_t *pgdp,
 	return err;
 }
 
+static phys_addr_t kvm_kaddr_to_phys(void *kaddr)
+{
+	if (!is_vmalloc_addr(kaddr)) {
+		BUG_ON(!virt_addr_valid(kaddr));
+		return __pa(kaddr);
+	} else {
+		return page_to_phys(vmalloc_to_page(kaddr)) +
+		       offset_in_page(kaddr);
+	}
+}
+
 /**
  * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode
  * @from:	The virtual kernel start address of the range
@@ -345,16 +356,27 @@ static int __create_hyp_mappings(pgd_t *pgdp,
  */
 int create_hyp_mappings(void *from, void *to)
 {
-	unsigned long phys_addr = virt_to_phys(from);
+	phys_addr_t phys_addr;
+	unsigned long virt_addr;
 	unsigned long start = KERN_TO_HYP((unsigned long)from);
 	unsigned long end = KERN_TO_HYP((unsigned long)to);
 
-	/* Check for a valid kernel memory mapping */
-	if (!virt_addr_valid(from) || !virt_addr_valid(to - 1))
-		return -EINVAL;
+	start = start & PAGE_MASK;
+	end = PAGE_ALIGN(end);
 
-	return __create_hyp_mappings(hyp_pgd, start, end,
-				     __phys_to_pfn(phys_addr), PAGE_HYP);
+	for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
+		int err;
+
+		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
+		err = __create_hyp_mappings(hyp_pgd, virt_addr,
+					    virt_addr + PAGE_SIZE,
+					    __phys_to_pfn(phys_addr),
+					    PAGE_HYP);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
 /**

From 4ae68e1ee672530df9236b07ff267f06e107bc9a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 18 Nov 2013 10:35:55 +0100
Subject: [PATCH 106/287] KVM: kvm_clear_guest_page(): fix empty_zero_page
 usage

Using the address of 'empty_zero_page' as source address in order to
clear a page is wrong. On some architectures empty_zero_page is only the
pointer to the struct page of the empty_zero_page.  Therefore the clear
page operation would copy the contents of a couple of struct pages instead
of clearing a page.  For kvm only arm/arm64 are affected by this bug.

To fix this use the ZERO_PAGE macro instead which will return the struct
page address of the empty_zero_page on all architectures.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
(cherry picked from commit 8a3caa6d74597c2a083f7c87f866891a0b12540b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dd9ce144cf99..e2b9a0670639 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1613,8 +1613,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
 
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
 {
-	return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
-				    offset, len);
+	const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+	return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
 

From 4479f3b0060731a9d61b30570db282b5587c885f Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Tue, 19 Nov 2013 14:59:12 -0500
Subject: [PATCH 107/287] arm/arm64: kvm: Use virt_to_idmap instead of
 virt_to_phys for idmap mappings

KVM initialisation fails on architectures implementing virt_to_idmap()
because virt_to_phys() on such architectures won't fetch you the correct
idmap page.

So update the KVM ARM code to use the virt_to_idmap() to fix the issue.
Since the KVM code is shared between arm and arm64, we create
kvm_virt_to_phys() and handle the redirection in respective headers.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4fda342cc7f577599c53fd27b99c953c7b1da18a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   | 1 +
 arch/arm/kvm/mmu.c               | 8 ++++----
 arch/arm64/include/asm/kvm_mmu.h | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 77de4a41cc50..2d122adcdb22 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
 }
 
 #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 580906989db1..659db0ed1370 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -916,9 +916,9 @@ int kvm_mmu_init(void)
 {
 	int err;
 
-	hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
-	hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
-	hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
+	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
 
 	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
 		/*
@@ -945,7 +945,7 @@ int kvm_mmu_init(void)
 		 */
 		kvm_flush_dcache_to_poc(init_bounce_page, len);
 
-		phys_base = virt_to_phys(init_bounce_page);
+		phys_base = kvm_virt_to_phys(init_bounce_page);
 		hyp_idmap_vector += phys_base - hyp_idmap_start;
 		hyp_idmap_start = phys_base;
 		hyp_idmap_end = phys_base + len;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 680f74e67497..7f1f9408ff66 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
 }
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */

From bc071b4cc769735a12fc9fa17cb54dd42f3fb0e6 Mon Sep 17 00:00:00 2001
From: Andy Honig <ahonig@google.com>
Date: Mon, 18 Nov 2013 16:09:22 -0800
Subject: [PATCH 108/287] KVM: Improve create VCPU parameter (CVE-2013-4587)

In multiple functions the vcpu_id is used as an offset into a bitfield.  Ag
malicious user could specify a vcpu_id greater than 255 in order to set or
clear bits in kernel memory.  This could be used to elevate priveges in the
kernel.  This patch verifies that the vcpu_id provided is less than 255.
The api documentation already specifies that the vcpu_id must be less than
max_vcpus, but this is currently not checked.

Reported-by: Andrew Honig <ahonig@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Honig <ahonig@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 338c7dbadd2671189cec7faf64c84d01071b3f96)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e2b9a0670639..2dd59b957164 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1894,6 +1894,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	int r;
 	struct kvm_vcpu *vcpu, *v;
 
+	if (id >= KVM_MAX_VCPUS)
+		return -EINVAL;
+
 	vcpu = kvm_arch_vcpu_create(kvm, id);
 	if (IS_ERR(vcpu))
 		return PTR_ERR(vcpu);

From 580ab4613364270a998a44850d49c4d8c5df5144 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 13 Dec 2013 15:07:21 +0900
Subject: [PATCH 109/287] KVM: Use cond_resched() directly and remove useless
 kvm_resched()

Since the commit 15ad7146 ("KVM: Use the scheduler preemption notifiers
to make kvm preemptible"), the remaining stuff in this function is a
simple cond_resched() call with an extra need_resched() check which was
there to avoid dropping VCPUs unnecessarily.  Now it is meaningless.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c08ac06ab3f3cdb8d34376c3a8a5e46a31a62c8f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/ia64/kvm/kvm-ia64.c     | 2 +-
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 arch/x86/kvm/x86.c           | 2 +-
 include/linux/kvm_host.h     | 1 -
 virt/kvm/kvm_main.c          | 8 --------
 5 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 985bf80c622e..53f44bee9ebb 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -702,7 +702,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 out:
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (r > 0) {
-		kvm_resched(vcpu);
+		cond_resched();
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		goto again;
 	}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f5928b394..717e5b525f3b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1253,7 +1253,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	kvm_guest_exit();
 
 	preempt_enable();
-	kvm_resched(vcpu);
+	cond_resched();
 
 	spin_lock(&vc->lock);
 	now = get_tb();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5276618579d3..805c8e92cf66 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5947,7 +5947,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		}
 		if (need_resched()) {
 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-			kvm_resched(vcpu);
+			cond_resched();
 			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 		}
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6d24a2671d29..9c0f8545df73 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -569,7 +569,6 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
-void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2dd59b957164..cb9a865c8e01 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1706,14 +1706,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
-void kvm_resched(struct kvm_vcpu *vcpu)
-{
-	if (!need_resched())
-		return;
-	cond_resched();
-}
-EXPORT_SYMBOL_GPL(kvm_resched);
-
 bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
 	struct pid *pid;

From 8215ed10b4803d159b3f10784140a79eafd94382 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Mon, 5 Aug 2013 15:04:46 +0100
Subject: [PATCH 110/287] arm: kvm: implement CPU PM notifier

Upon CPU shutdown and consequent warm-reboot, the hypervisor CPU state
must be re-initialized. This patch implements a CPU PM notifier that
upon warm-boot calls a KVM hook to reinitialize properly the hypervisor
state so that the CPU can be safely resumed.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
(cherry picked from commit 1fcf7ce0c60213994269fb59569ec161eb6e08d6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e312e4a53f8d..b04013608e7e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
@@ -853,6 +854,33 @@ static struct notifier_block hyp_init_cpu_nb = {
 	.notifier_call = hyp_init_cpu_notify,
 };
 
+#ifdef CONFIG_CPU_PM
+static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
+				    unsigned long cmd,
+				    void *v)
+{
+	if (cmd == CPU_PM_EXIT) {
+		cpu_init_hyp_mode(NULL);
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hyp_init_cpu_pm_nb = {
+	.notifier_call = hyp_init_cpu_pm_notifier,
+};
+
+static void __init hyp_cpu_pm_init(void)
+{
+	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
+}
+#else
+static inline void hyp_cpu_pm_init(void)
+{
+}
+#endif
+
 /**
  * Inits Hyp-mode on all online CPUs
  */
@@ -1013,6 +1041,8 @@ int kvm_arch_init(void *opaque)
 		goto out_err;
 	}
 
+	hyp_cpu_pm_init();
+
 	kvm_coproc_table_init();
 	return 0;
 out_err:

From 90715c6a824fcc239e7da97c4078b85c7944df7c Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Mon, 9 Dec 2013 00:22:53 +0900
Subject: [PATCH 111/287] treewide: Fix typos in printk

Correct spelling typo in various part of kernel

[ cdall: Pickes KVM/arm64 specific part not already merged into LSK ]

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
(cherry picked from commit 77d84ff87e9d38072abcca665ca22cb1da41cb86)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/handle_exit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 8da56067c304..42a0f1bddfe7 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -90,7 +90,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
 
 	if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
 	    !arm_exit_handlers[hsr_ec]) {
-		kvm_err("Unkown exception class: hsr: %#08x\n",
+		kvm_err("Unknown exception class: hsr: %#08x\n",
 			(unsigned int)kvm_vcpu_get_hsr(vcpu));
 		BUG();
 	}

From 7e2c9ce0199412b559bbf527956c004a05a64619 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Nov 2013 17:43:19 -0800
Subject: [PATCH 112/287] arm: KVM: Don't return PSCI_INVAL if waitqueue is
 inactive

The current KVM implementation of PSCI returns INVALID_PARAMETERS if the
waitqueue for the corresponding CPU is not active.  This does not seem
correct, since KVM should not care what the specific thread is doing,
for example, user space may not have called KVM_RUN on this VCPU yet or
the thread may be busy looping to user space because it received a
signal; this is really up to the user space implementation.  Instead we
should check specifically that the CPU is marked as being turned off,
regardless of the VCPU thread state, and if it is, we shall
simply clear the pause flag on the CPU and wake up the thread if it
happens to be blocked for us.

Further, the implementation seems to be racy when executing multiple
VCPU threads.  There really isn't a reasonable user space programming
scheme to ensure all secondary CPUs have reached kvm_vcpu_first_run_init
before turning on the boot CPU.

Therefore, set the pause flag on the vcpu at VCPU init time (which can
reasonably be expected to be completed for all CPUs by user space before
running any VCPUs) and clear both this flag and the feature (in case the
feature can somehow get set again in the future) and ping the waitqueue
on turning on a VCPU using PSCI.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 478a8237f656d86d25b3e4e4bf3c48f590156294)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c  | 30 +++++++++++++++++++-----------
 arch/arm/kvm/psci.c | 11 ++++++-----
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b04013608e7e..bb43b8cc1231 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -479,15 +479,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 			return ret;
 	}
 
-	/*
-	 * Handle the "start in power-off" case by calling into the
-	 * PSCI code.
-	 */
-	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
-		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
-		kvm_psci_call(vcpu);
-	}
-
 	return 0;
 }
 
@@ -701,6 +692,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
+					 struct kvm_vcpu_init *init)
+{
+	int ret;
+
+	ret = kvm_vcpu_set_target(vcpu, init);
+	if (ret)
+		return ret;
+
+	/*
+	 * Handle the "start in power-off" case by marking the VCPU as paused.
+	 */
+	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+		vcpu->arch.pause = true;
+
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -714,8 +723,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&init, argp, sizeof(init)))
 			return -EFAULT;
 
-		return kvm_vcpu_set_target(vcpu, &init);
-
+		return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
 	}
 	case KVM_SET_ONE_REG:
 	case KVM_GET_ONE_REG: {
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 0881bf169fbc..448f60e8d23c 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		}
 	}
 
-	if (!vcpu)
+	/*
+	 * Make sure the caller requested a valid CPU and that the CPU is
+	 * turned off.
+	 */
+	if (!vcpu || !vcpu->arch.pause)
 		return KVM_PSCI_RET_INVAL;
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
 
-	wq = kvm_arch_vcpu_wq(vcpu);
-	if (!waitqueue_active(wq))
-		return KVM_PSCI_RET_INVAL;
-
 	kvm_reset_vcpu(vcpu);
 
 	/* Gracefully handle Thumb2 entry point */
@@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
 
+	wq = kvm_arch_vcpu_wq(vcpu);
 	wake_up_interruptible(wq);
 
 	return KVM_PSCI_RET_SUCCESS;

From b5a94dd48dc470624dcb7d581432a439d7a289c0 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 16 Nov 2013 10:51:25 -0800
Subject: [PATCH 113/287] arm/arm64: KVM: arch_timer: Initialize cntvoff at
 kvm_init

Initialize the cntvoff at kvm_init_vm time, not before running the VCPUs
at the first time because that will overwrite any potentially restored
values from user space.

Cc: Andre Przywara <andre.przywara@linaro.org>
Acked-by: Marc Zynger <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit a1a64387adeeba7a34ce06f2774e81f496ee803b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c  | 2 ++
 virt/kvm/arm/vgic.c | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index bb43b8cc1231..71ad3a8706d9 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -138,6 +138,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (ret)
 		goto out_free_stage2_pgd;
 
+	kvm_timer_init(kvm);
+
 	/* Mark the initial VMID generation invalid */
 	kvm->arch.vmid_gen = 0;
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 685fc72fc751..81e9481184a7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1409,7 +1409,6 @@ int kvm_vgic_init(struct kvm *kvm)
 	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
 		vgic_set_target_reg(kvm, 0, i);
 
-	kvm_timer_init(kvm);
 	kvm->arch.vgic.ready = true;
 out:
 	mutex_unlock(&kvm->lock);

From 764ee339777196e8cf9fa090f5041b02efc93630 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@linaro.org>
Date: Fri, 13 Dec 2013 14:23:26 +0100
Subject: [PATCH 114/287] ARM/KVM: save and restore generic timer registers

For migration to work we need to save (and later restore) the state of
each core's virtual generic timer.
Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export
the three needed registers (control, counter, compare value).
Though they live in cp15 space, we don't use the existing list, since
they need special accessor functions and the arch timer is optional.

Acked-by: Marc Zynger <marc.zyngier@arm.com>
Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 39735a3a390431bcf60f9174b7d64f787fd6afa9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   |  3 +
 arch/arm/include/uapi/asm/kvm.h   | 20 +++++++
 arch/arm/kvm/guest.c              | 92 ++++++++++++++++++++++++++++++-
 arch/arm64/include/uapi/asm/kvm.h | 18 ++++++
 virt/kvm/arm/arch_timer.c         | 34 ++++++++++++
 5 files changed, 166 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a6f6db14ee4..098f7dd6d564 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
+int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c498b60c0505..835b8678de03 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -119,6 +119,26 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_32_CRN_MASK		0x0000000000007800
 #define KVM_REG_ARM_32_CRN_SHIFT	11
 
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+	(KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+	ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+	ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+	ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+	ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / 4)
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 20f8d97904af..2786eae10c0d 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return -EINVAL;
 }
 
+#ifndef CONFIG_KVM_ARM_TIMER
+
+#define NUM_TIMER_REGS 0
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	return 0;
+}
+
+static bool is_timer_reg(u64 index)
+{
+	return false;
+}
+
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	return 0;
+}
+
+#else
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+#endif
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return ret;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
 static unsigned long num_core_regs(void)
 {
 	return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +198,8 @@ static unsigned long num_core_regs(void)
  */
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
-	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu);
+	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
+		+ NUM_TIMER_REGS;
 }
 
 /**
@@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
 	unsigned int i;
 	const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
+	int ret;
 
 	for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
 		if (put_user(core_reg | i, uindices))
@@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 		uindices++;
 	}
 
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
 	return kvm_arm_copy_coproc_indices(vcpu, uindices);
 }
 
@@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return get_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_get_reg(vcpu, reg);
 }
 
@@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return set_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_set_reg(vcpu, reg);
 }
 
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5031f4263937..7c25ca8b02b3 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -129,6 +129,24 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
 #define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
 
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+	KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+	(KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+	ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+	ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+	ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+	ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+	ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index c2e1ef4604e8..5081e809821f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -182,6 +182,40 @@ static void kvm_timer_init_interrupt(void *info)
 	enable_percpu_irq(host_vtimer_irq, 0);
 }
 
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	switch (regid) {
+	case KVM_REG_ARM_TIMER_CTL:
+		timer->cntv_ctl = value;
+		break;
+	case KVM_REG_ARM_TIMER_CNT:
+		vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
+		break;
+	case KVM_REG_ARM_TIMER_CVAL:
+		timer->cntv_cval = value;
+		break;
+	default:
+		return -1;
+	}
+	return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	switch (regid) {
+	case KVM_REG_ARM_TIMER_CTL:
+		return timer->cntv_ctl;
+	case KVM_REG_ARM_TIMER_CNT:
+		return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+	case KVM_REG_ARM_TIMER_CVAL:
+		return timer->cntv_cval;
+	}
+	return (u64)-1;
+}
 
 static int kvm_timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *cpu)

From d90651fa17fcc2112dcfe05832a6c1169d26dee3 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:55 -0700
Subject: [PATCH 115/287] ARM: KVM: Allow creating the VGIC after VCPUs

Rework the VGIC initialization slightly to allow initialization of the
vgic cpu-specific state even if the irqchip (the VGIC) hasn't been
created by user space yet.  This is safe, because the vgic data
structures are already allocated when the CPU is allocated if VGIC
support is compiled into the kernel.  Further, the init process does not
depend on any other information and the sacrifice is a slight
performance degradation for creating VMs in the no-VGIC case.

The reason is that the new device control API doesn't mandate creating
the VGIC before creating the VCPU and it is unreasonable to require user
space to create the VGIC before creating the VCPUs.

At the same time move the irqchip_in_kernel check out of
kvm_vcpu_first_run_init and into the init function to make the per-vcpu
and global init functions symmetric and add comments on the exported
functions making it a bit easier to understand the init flow by only
looking at vgic.c.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e1ba0207a1b3714bb3f000e506285ae5123cdfa7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c  |  7 ++++---
 virt/kvm/arm/vgic.c | 22 +++++++++++++++++++---
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 71ad3a8706d9..ea8da1f4fe49 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -465,6 +465,8 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	if (likely(vcpu->arch.has_run_once))
 		return 0;
 
@@ -474,9 +476,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 * Initialize the VGIC before running a vcpu the first time on
 	 * this VM.
 	 */
-	if (irqchip_in_kernel(vcpu->kvm) &&
-	    unlikely(!vgic_initialized(vcpu->kvm))) {
-		int ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_initialized(vcpu->kvm))) {
+		ret = kvm_vgic_init(vcpu->kvm);
 		if (ret)
 			return ret;
 	}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 81e9481184a7..5e9df47778fb 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1243,15 +1243,19 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+/**
+ * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
+ * @vcpu: pointer to the vcpu struct
+ *
+ * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
+ * this vcpu and enable the VGIC for this VCPU
+ */
 int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int i;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
-		return 0;
-
 	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
 		return -EBUSY;
 
@@ -1383,10 +1387,22 @@ int kvm_vgic_hyp_init(void)
 	return ret;
 }
 
+/**
+ * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * @kvm: pointer to the kvm struct
+ *
+ * Map the virtual CPU interface into the VM before running any VCPUs.  We
+ * can't do this at creation time, because user space must first set the
+ * virtual CPU interface address in the guest physical address space.  Also
+ * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ */
 int kvm_vgic_init(struct kvm *kvm)
 {
 	int ret = 0, i;
 
+	if (!irqchip_in_kernel(kvm))
+		return 0;
+
 	mutex_lock(&kvm->lock);
 
 	if (vgic_initialized(kvm))

From 1032acb686aaabafb124e12ae15a6adc9b3da557 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 25 Oct 2013 17:29:18 +0100
Subject: [PATCH 116/287] KVM: arm-vgic: Support KVM_CREATE_DEVICE for VGIC

Support creating the ARM VGIC device through the KVM_CREATE_DEVICE
ioctl, which can then later be leveraged to use the
KVM_{GET/SET}_DEVICE_ATTR, which is useful both for setting addresses in
a more generic API than the ARM-specific one and is useful for
save/restore of VGIC state.

Adds KVM_CAP_DEVICE_CTRL to ARM capabilities.

Note that we change the check for creating a VGIC from bailing out if
any VCPUs were created, to bailing out if any VCPUs were ever run.  This
is an important distinction that shouldn't break anything, but allows
creating the VGIC after the VCPUs have been created.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 7330672befe6269e575f79b924a7068b26c144b4)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 .../virtual/kvm/devices/arm-vgic.txt          | 10 +++
 arch/arm/kvm/arm.c                            |  1 +
 include/linux/kvm_host.h                      |  1 +
 include/uapi/linux/kvm.h                      |  1 +
 virt/kvm/arm/vgic.c                           | 63 ++++++++++++++++++-
 virt/kvm/kvm_main.c                           |  5 ++
 6 files changed, 79 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/arm-vgic.txt

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000000..38f27f709a99
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -0,0 +1,10 @@
+ARM Virtual Generic Interrupt Controller (VGIC)
+===============================================
+
+Device types supported:
+  KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
+
+Only one VGIC instance may be instantiated through either this API or the
+legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
+controller, requiring emulated user-space devices to inject interrupts to the
+VGIC instead of directly to CPUs.
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ea8da1f4fe49..fcb68bb96176 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -191,6 +191,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQCHIP:
 		r = vgic_present;
 		break;
+	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9c0f8545df73..1dfc17255ee0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1045,6 +1045,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
+extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d5b8501cebfd..56347ab54cf7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -848,6 +848,7 @@ struct kvm_device_attr {
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
+#define KVM_DEV_TYPE_ARM_VGIC_V2	5
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 5e9df47778fb..b15d6c17a090 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1433,20 +1433,45 @@ int kvm_vgic_init(struct kvm *kvm)
 
 int kvm_vgic_create(struct kvm *kvm)
 {
-	int ret = 0;
+	int i, vcpu_lock_idx = -1, ret = 0;
+	struct kvm_vcpu *vcpu;
 
 	mutex_lock(&kvm->lock);
 
-	if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
+	if (kvm->arch.vgic.vctrl_base) {
 		ret = -EEXIST;
 		goto out;
 	}
 
+	/*
+	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
+	 * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
+	 * that no other VCPUs are run while we create the vgic.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!mutex_trylock(&vcpu->mutex))
+			goto out_unlock;
+		vcpu_lock_idx = i;
+	}
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (vcpu->arch.has_run_once) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+	}
+
 	spin_lock_init(&kvm->arch.vgic.lock);
 	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 
+out_unlock:
+	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+		mutex_unlock(&vcpu->mutex);
+	}
+
 out:
 	mutex_unlock(&kvm->lock);
 	return ret;
@@ -1510,3 +1535,37 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
 	mutex_unlock(&kvm->lock);
 	return r;
 }
+
+static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	return -ENXIO;
+}
+
+static void vgic_destroy(struct kvm_device *dev)
+{
+	kfree(dev);
+}
+
+static int vgic_create(struct kvm_device *dev, u32 type)
+{
+	return kvm_vgic_create(dev->kvm);
+}
+
+struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+	.name = "kvm-arm-vgic",
+	.create = vgic_create,
+	.destroy = vgic_destroy,
+	.set_attr = vgic_set_attr,
+	.get_attr = vgic_get_attr,
+	.has_attr = vgic_has_attr,
+};
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cb9a865c8e01..e9a43b6455be 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2271,6 +2271,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	case KVM_DEV_TYPE_VFIO:
 		ops = &kvm_vfio_ops;
 		break;
+#endif
+#ifdef CONFIG_KVM_ARM_VGIC
+	case KVM_DEV_TYPE_ARM_VGIC_V2:
+		ops = &kvm_arm_vgic_v2_ops;
+		break;
 #endif
 	default:
 		return -ENODEV;

From c4ad31ff7d94f6504c757ddcf742f0597c494080 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:56 -0700
Subject: [PATCH 117/287] KVM: arm-vgic: Set base addr through device API

Support setting the distributor and cpu interface base addresses in the
VM physical address space through the KVM_{SET,GET}_DEVICE_ATTR API
in addition to the ARM specific API.

This has the added benefit of being able to share more code in user
space and do things in a uniform manner.

Also deprecate the older API at the same time, but backwards
compatibility will be maintained.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit ce01e4e8874d410738f4b4733b26642d6611a331)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt             |  8 +-
 .../virtual/kvm/devices/arm-vgic.txt          | 11 +++
 arch/arm/include/uapi/asm/kvm.h               |  2 +
 arch/arm/kvm/arm.c                            |  2 +-
 include/kvm/arm_vgic.h                        |  2 +-
 virt/kvm/arm/vgic.c                           | 87 ++++++++++++++++---
 6 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index d196ebe8956e..3c75a17555a8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2324,7 +2324,7 @@ This ioctl returns the guest registers that are supported for the
 KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
 
 
-4.80 KVM_ARM_SET_DEVICE_ADDR
+4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
 
 Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
 Architectures: arm, arm64
@@ -2362,7 +2362,11 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
 KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
 base addresses will return -EEXIST.
 
-4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
+should be used instead.
+
+
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
 
 Capability: KVM_CAP_PPC_RTAS
 Architectures: ppc
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 38f27f709a99..c9febb2a0c3e 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -8,3 +8,14 @@ Only one VGIC instance may be instantiated through either this API or the
 legacy KVM_CREATE_IRQCHIP api.  The created VGIC will act as the VM interrupt
 controller, requiring emulated user-space devices to inject interrupts to the
 VGIC instead of directly to CPUs.
+
+Groups:
+  KVM_DEV_ARM_VGIC_GRP_ADDR
+  Attributes:
+    KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
+      Base address in the guest physical address space of the GIC distributor
+      register mappings.
+
+    KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
+      Base address in the guest physical address space of the GIC virtual cpu
+      interface register mappings.
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 835b8678de03..76a742769e2b 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -163,6 +163,8 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST		0x1009
 #define KVM_REG_ARM_VFP_FPINST2		0x100A
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index fcb68bb96176..e71f6e15d3cd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -785,7 +785,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 	case KVM_ARM_DEVICE_VGIC_V2:
 		if (!vgic_present)
 			return -ENXIO;
-		return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
+		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
 	}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7e2d15837b02..be85127bfed3 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -144,7 +144,7 @@ struct kvm_run;
 struct kvm_exit_mmio;
 
 #ifdef CONFIG_KVM_ARM_VGIC
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b15d6c17a090..45db48de4282 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1495,6 +1495,12 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
 {
 	int ret;
 
+	if (addr & ~KVM_PHYS_MASK)
+		return -E2BIG;
+
+	if (addr & (SZ_4K - 1))
+		return -EINVAL;
+
 	if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
 		return -EEXIST;
 	if (addr + size < addr)
@@ -1507,26 +1513,41 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
 	return ret;
 }
 
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
+/**
+ * kvm_vgic_addr - set or get vgic VM base addresses
+ * @kvm:   pointer to the vm struct
+ * @type:  the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
+ * @addr:  pointer to address value
+ * @write: if true set the address in the VM address space, if false read the
+ *          address
+ *
+ * Set or get the vgic base addresses for the distributor and the virtual CPU
+ * interface in the VM physical address space.  These addresses are properties
+ * of the emulated core/SoC and therefore user space initially knows this
+ * information.
+ */
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 {
 	int r = 0;
 	struct vgic_dist *vgic = &kvm->arch.vgic;
 
-	if (addr & ~KVM_PHYS_MASK)
-		return -E2BIG;
-
-	if (addr & (SZ_4K - 1))
-		return -EINVAL;
-
 	mutex_lock(&kvm->lock);
 	switch (type) {
 	case KVM_VGIC_V2_ADDR_TYPE_DIST:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
-				       addr, KVM_VGIC_V2_DIST_SIZE);
+		if (write) {
+			r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
+					       *addr, KVM_VGIC_V2_DIST_SIZE);
+		} else {
+			*addr = vgic->vgic_dist_base;
+		}
 		break;
 	case KVM_VGIC_V2_ADDR_TYPE_CPU:
-		r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
-				       addr, KVM_VGIC_V2_CPU_SIZE);
+		if (write) {
+			r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
+					       *addr, KVM_VGIC_V2_CPU_SIZE);
+		} else {
+			*addr = vgic->vgic_cpu_base;
+		}
 		break;
 	default:
 		r = -ENODEV;
@@ -1538,16 +1559,58 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
 
 static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
+	int r;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 addr;
+		unsigned long type = (unsigned long)attr->attr;
+
+		if (copy_from_user(&addr, uaddr, sizeof(addr)))
+			return -EFAULT;
+
+		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
+		return (r == -ENODEV) ? -ENXIO : r;
+	}
+	}
+
 	return -ENXIO;
 }
 
 static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
-	return -ENXIO;
+	int r = -ENXIO;
+
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+		u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+		u64 addr;
+		unsigned long type = (unsigned long)attr->attr;
+
+		r = kvm_vgic_addr(dev->kvm, type, &addr, false);
+		if (r)
+			return (r == -ENODEV) ? -ENXIO : r;
+
+		if (copy_to_user(uaddr, &addr, sizeof(addr)))
+			return -EFAULT;
+	}
+	}
+
+	return r;
 }
 
 static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_ADDR:
+		switch (attr->attr) {
+		case KVM_VGIC_V2_ADDR_TYPE_DIST:
+		case KVM_VGIC_V2_ADDR_TYPE_CPU:
+			return 0;
+		}
+		break;
+	}
 	return -ENXIO;
 }
 

From 10b316b72589c7ba6f74c7263a289ffbaa1bf80b Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:56 -0700
Subject: [PATCH 118/287] irqchip: arm-gic: Define additional MMIO offsets and
 masks

Define CPU interface offsets for the GICC_ABPR, GICC_APR, and GICC_IIDR
registers.  Define distributor registers for the GICD_SPENDSGIR and the
GICD_CPENDSGIR.  KVM/ARM needs to know about these definitions to fully
support save/restore of the VGIC.

Also define some masks and shifts for the various GICH_VMCR fields.

Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 0307e1770fdeff2732cf7a35d0f7f49db67c6621)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/irqchip/arm-gic.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 3e203eb23cc7..4483adb61c88 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -17,6 +17,9 @@
 #define GIC_CPU_EOI			0x10
 #define GIC_CPU_RUNNINGPRI		0x14
 #define GIC_CPU_HIGHPRI			0x18
+#define GIC_CPU_ALIAS_BINPOINT		0x1c
+#define GIC_CPU_ACTIVEPRIO		0xd0
+#define GIC_CPU_IDENT			0xfc
 
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
@@ -54,6 +57,15 @@
 #define GICH_LR_ACTIVE_BIT		(1 << 29)
 #define GICH_LR_EOI			(1 << 19)
 
+#define GICH_VMCR_CTRL_SHIFT		0
+#define GICH_VMCR_CTRL_MASK		(0x21f << GICH_VMCR_CTRL_SHIFT)
+#define GICH_VMCR_PRIMASK_SHIFT		27
+#define GICH_VMCR_PRIMASK_MASK		(0x1f << GICH_VMCR_PRIMASK_SHIFT)
+#define GICH_VMCR_BINPOINT_SHIFT	21
+#define GICH_VMCR_BINPOINT_MASK		(0x7 << GICH_VMCR_BINPOINT_SHIFT)
+#define GICH_VMCR_ALIAS_BINPOINT_SHIFT	18
+#define GICH_VMCR_ALIAS_BINPOINT_MASK	(0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT)
+
 #define GICH_MISR_EOI			(1 << 0)
 #define GICH_MISR_U			(1 << 1)
 

From 405003b808d4ff3de73eebc18dfb6f1e708626c4 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:56 -0700
Subject: [PATCH 119/287] KVM: arm-vgic: Make vgic mmio functions more generic

Rename the vgic_ranges array to vgic_dist_ranges to be more specific and
to prepare for handling CPU interface register access as well (for
save/restore of VGIC state).

Pass offset from distributor or interface MMIO base to
find_matching_range function instead of the physical address of the
access in the VM memory map.  This allows other callers unaware of the
VM specifics, but with generic VGIC knowledge to reuse the function.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1006e8cb22e861260688917ca4cfe6cde8ad69eb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 45db48de4282..e2596f618281 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -602,7 +602,7 @@ struct mmio_range {
 			    phys_addr_t offset);
 };
 
-static const struct mmio_range vgic_ranges[] = {
+static const struct mmio_range vgic_dist_ranges[] = {
 	{
 		.base		= GIC_DIST_CTRL,
 		.len		= 12,
@@ -669,14 +669,13 @@ static const struct mmio_range vgic_ranges[] = {
 static const
 struct mmio_range *find_matching_range(const struct mmio_range *ranges,
 				       struct kvm_exit_mmio *mmio,
-				       phys_addr_t base)
+				       phys_addr_t offset)
 {
 	const struct mmio_range *r = ranges;
-	phys_addr_t addr = mmio->phys_addr - base;
 
 	while (r->len) {
-		if (addr >= r->base &&
-		    (addr + mmio->len) <= (r->base + r->len))
+		if (offset >= r->base &&
+		    (offset + mmio->len) <= (r->base + r->len))
 			return r;
 		r++;
 	}
@@ -713,7 +712,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		return true;
 	}
 
-	range = find_matching_range(vgic_ranges, mmio, base);
+	offset = mmio->phys_addr - base;
+	range = find_matching_range(vgic_dist_ranges, mmio, offset);
 	if (unlikely(!range || !range->handle_mmio)) {
 		pr_warn("Unhandled access %d %08llx %d\n",
 			mmio->is_write, mmio->phys_addr, mmio->len);

From f05c65c6daf2d2c33c31ddc3b48df2693f8e9594 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 11 Dec 2013 20:29:11 -0800
Subject: [PATCH 120/287] arm/arm64: kvm: Set vcpu->cpu to -1 on vcpu_put

The arch-generic KVM code expects the cpu field of a vcpu to be -1 if
the vcpu is no longer assigned to a cpu.  This is used for the optimized
make_all_cpus_request path and will be used by the vgic code to check
that no vcpus are running.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e9b152cb957cb194437f37e79f0f3c9d34fe53d6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e71f6e15d3cd..169c718ddd90 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -343,6 +343,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
+	 * if the vcpu is no longer assigned to a cpu.  This is used for the
+	 * optimized make_all_cpus_request path.
+	 */
+	vcpu->cpu = -1;
+
 	kvm_arm_set_running_vcpu(NULL);
 }
 

From 0b3a540dcc4087698c95c6d1bda4071424283345 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 25 Oct 2013 21:17:31 +0100
Subject: [PATCH 121/287] KVM: arm-vgic: Add vgic reg access from dev attr

Add infrastructure to handle distributor and cpu interface register
accesses through the KVM_{GET/SET}_DEVICE_ATTR interface by adding the
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS groups
and defining the semantics of the attr field to be the MMIO offset as
specified in the GICv2 specs.

Missing register accesses or other changes in individual register access
functions to support save/restore of the VGIC state is added in
subsequent patches.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 .../virtual/kvm/devices/arm-vgic.txt          |  52 +++++
 arch/arm/include/uapi/asm/kvm.h               |   6 +
 virt/kvm/arm/vgic.c                           | 178 ++++++++++++++++++
 3 files changed, 236 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index c9febb2a0c3e..7f4e91b1316b 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -19,3 +19,55 @@ Groups:
     KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
       Base address in the guest physical address space of the GIC virtual cpu
       interface register mappings.
+
+  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+    values:   |    reserved   |   cpu id   |      offset     |
+
+    All distributor regs are (rw, 32-bit)
+
+    The offset is relative to the "Distributor base address" as defined in the
+    GICv2 specs.  Getting or setting such a register has the same effect as
+    reading or writing the register on the actual hardware from the cpu
+    specified with cpu id field.  Note that most distributor fields are not
+    banked, but return the same value regardless of the cpu id used to access
+    the register.
+  Limitations:
+    - Priorities are not implemented, and registers are RAZ/WI
+  Errors:
+    -ENODEV: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
+
+  KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+    values:   |    reserved   |   cpu id   |      offset     |
+
+    All CPU interface regs are (rw, 32-bit)
+
+    The offset specifies the offset from the "CPU interface base address" as
+    defined in the GICv2 specs.  Getting or setting such a register has the
+    same effect as reading or writing the register on the actual hardware.
+
+    The Active Priorities Registers APRn are implementation defined, so we set a
+    fixed format for our implementation that fits with the model of a "GICv2
+    implementation without the security extensions" which we present to the
+    guest.  This interface always exposes four register APR[0-3] describing the
+    maximum possible 128 preemption levels.  The semantics of the register
+    indicate if any interrupts in a given preemption level are in the active
+    state by setting the corresponding bit.
+
+    Thus, preemption level X has one or more active interrupts if and only if:
+
+      APRn[X mod 32] == 0b1,  where n = X / 32
+
+    Bits for undefined preemption levels are RAZ/WI.
+
+  Limitations:
+    - Priorities are not implemented, and registers are RAZ/WI
+  Errors:
+    -ENODEV: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 76a742769e2b..ef0c8785ba16 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -165,6 +165,12 @@ struct kvm_arch_memory_slot {
 
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e2596f618281..88599b585362 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -589,6 +589,20 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 	return false;
 }
 
+static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	return false;
+}
+
+static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
+				struct kvm_exit_mmio *mmio,
+				phys_addr_t offset)
+{
+	return false;
+}
+
 /*
  * I would have liked to use the kvm_bus_io_*() API instead, but it
  * cannot cope with banked registers (only the VM pointer is passed
@@ -663,6 +677,16 @@ static const struct mmio_range vgic_dist_ranges[] = {
 		.len		= 4,
 		.handle_mmio	= handle_mmio_sgi_reg,
 	},
+	{
+		.base		= GIC_DIST_SGI_PENDING_CLEAR,
+		.len		= VGIC_NR_SGIS,
+		.handle_mmio	= handle_mmio_sgi_clear,
+	},
+	{
+		.base		= GIC_DIST_SGI_PENDING_SET,
+		.len		= VGIC_NR_SGIS,
+		.handle_mmio	= handle_mmio_sgi_set,
+	},
 	{}
 };
 
@@ -1557,6 +1581,114 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 	return r;
 }
 
+static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
+				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	return true;
+}
+
+static const struct mmio_range vgic_cpu_ranges[] = {
+	{
+		.base		= GIC_CPU_CTRL,
+		.len		= 12,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+	{
+		.base		= GIC_CPU_ALIAS_BINPOINT,
+		.len		= 4,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+	{
+		.base		= GIC_CPU_ACTIVEPRIO,
+		.len		= 16,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+	{
+		.base		= GIC_CPU_IDENT,
+		.len		= 4,
+		.handle_mmio	= handle_cpu_mmio_misc,
+	},
+};
+
+static int vgic_attr_regs_access(struct kvm_device *dev,
+				 struct kvm_device_attr *attr,
+				 u32 *reg, bool is_write)
+{
+	const struct mmio_range *r = NULL, *ranges;
+	phys_addr_t offset;
+	int ret, cpuid, c;
+	struct kvm_vcpu *vcpu, *tmp_vcpu;
+	struct vgic_dist *vgic;
+	struct kvm_exit_mmio mmio;
+
+	offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+	cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
+		KVM_DEV_ARM_VGIC_CPUID_SHIFT;
+
+	mutex_lock(&dev->kvm->lock);
+
+	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+	vgic = &dev->kvm->arch.vgic;
+
+	mmio.len = 4;
+	mmio.is_write = is_write;
+	if (is_write)
+		mmio_data_write(&mmio, ~0, *reg);
+	switch (attr->group) {
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+		mmio.phys_addr = vgic->vgic_dist_base + offset;
+		ranges = vgic_dist_ranges;
+		break;
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		mmio.phys_addr = vgic->vgic_cpu_base + offset;
+		ranges = vgic_cpu_ranges;
+		break;
+	default:
+		BUG();
+	}
+	r = find_matching_range(ranges, &mmio, offset);
+
+	if (unlikely(!r || !r->handle_mmio)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+
+	spin_lock(&vgic->lock);
+
+	/*
+	 * Ensure that no other VCPU is running by checking the vcpu->cpu
+	 * field.  If no other VPCUs are running we can safely access the VGIC
+	 * state, because even if another VPU is run after this point, that
+	 * VCPU will not touch the vgic state, because it will block on
+	 * getting the vgic->lock in kvm_vgic_sync_hwstate().
+	 */
+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
+		if (unlikely(tmp_vcpu->cpu != -1)) {
+			ret = -EBUSY;
+			goto out_vgic_unlock;
+		}
+	}
+
+	offset -= r->base;
+	r->handle_mmio(vcpu, &mmio, offset);
+
+	if (!is_write)
+		*reg = mmio_data_read(&mmio, ~0);
+
+	ret = 0;
+out_vgic_unlock:
+	spin_unlock(&vgic->lock);
+out:
+	mutex_unlock(&dev->kvm->lock);
+	return ret;
+}
+
 static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
 	int r;
@@ -1573,6 +1705,18 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = kvm_vgic_addr(dev->kvm, type, &addr, true);
 		return (r == -ENODEV) ? -ENXIO : r;
 	}
+
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 reg;
+
+		if (get_user(reg, uaddr))
+			return -EFAULT;
+
+		return vgic_attr_regs_access(dev, attr, &reg, true);
+	}
+
 	}
 
 	return -ENXIO;
@@ -1594,14 +1738,42 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 		if (copy_to_user(uaddr, &addr, sizeof(addr)))
 			return -EFAULT;
+		break;
 	}
+
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 reg = 0;
+
+		r = vgic_attr_regs_access(dev, attr, &reg, false);
+		if (r)
+			return r;
+		r = put_user(reg, uaddr);
+		break;
+	}
+
 	}
 
 	return r;
 }
 
+static int vgic_has_attr_regs(const struct mmio_range *ranges,
+			      phys_addr_t offset)
+{
+	struct kvm_exit_mmio dev_attr_mmio;
+
+	dev_attr_mmio.len = 4;
+	if (find_matching_range(ranges, &dev_attr_mmio, offset))
+		return 0;
+	else
+		return -ENXIO;
+}
+
 static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
+	phys_addr_t offset;
+
 	switch (attr->group) {
 	case KVM_DEV_ARM_VGIC_GRP_ADDR:
 		switch (attr->attr) {
@@ -1610,6 +1782,12 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 			return 0;
 		}
 		break;
+	case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+		return vgic_has_attr_regs(vgic_dist_ranges, offset);
+	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
 	}
 	return -ENXIO;
 }

From 944b8a6f9f400ded36fcdc61acd801fbd7413985 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 15 Nov 2013 20:51:31 -0800
Subject: [PATCH 122/287] KVM: arm-vgic: Support unqueueing of LRs to the dist

To properly access the VGIC state from user space it is very unpractical
to have to loop through all the LRs in all register access functions.
Instead, support moving all pending state from LRs to the distributor,
but leave active state LRs alone.

Note that to accurately present the active and pending state to VCPUs
reading these distributor registers from a live VM, we would have to
stop all other VPUs than the calling VCPU and ask each CPU to unqueue
their LR state onto the distributor and add fields to track active state
on the distributor side as well.  We don't have any users of such
functionality yet and there are other inaccuracies of the GIC emulation,
so don't provide accurate synchronized access to this state just yet.
However, when the time comes, having this function should help.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit cbd333a4bfd0d93bba36d46a0e4e7979228873a6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 88 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 83 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 88599b585362..d08ba28e729a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -589,6 +589,80 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 	return false;
 }
 
+#define LR_CPUID(lr)	\
+	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
+#define LR_IRQID(lr)	\
+	((lr) & GICH_LR_VIRTUALID)
+
+static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
+{
+	clear_bit(lr_nr, vgic_cpu->lr_used);
+	vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
+	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+}
+
+/**
+ * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
+ *
+ * Move any pending IRQs that have already been assigned to LRs back to the
+ * emulated distributor state so that the complete emulated state can be read
+ * from the main emulation structures without investigating the LRs.
+ *
+ * Note that IRQs in the active state in the LRs get their pending state moved
+ * to the distributor but the active state stays in the LRs, because we don't
+ * track the active state on the distributor side.
+ */
+static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	int vcpu_id = vcpu->vcpu_id;
+	int i, irq, source_cpu;
+	u32 *lr;
+
+	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+		lr = &vgic_cpu->vgic_lr[i];
+		irq = LR_IRQID(*lr);
+		source_cpu = LR_CPUID(*lr);
+
+		/*
+		 * There are three options for the state bits:
+		 *
+		 * 01: pending
+		 * 10: active
+		 * 11: pending and active
+		 *
+		 * If the LR holds only an active interrupt (not pending) then
+		 * just leave it alone.
+		 */
+		if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
+			continue;
+
+		/*
+		 * Reestablish the pending state on the distributor and the
+		 * CPU interface.  It may have already been pending, but that
+		 * is fine, then we are only setting a few bits that were
+		 * already set.
+		 */
+		vgic_dist_irq_set(vcpu, irq);
+		if (irq < VGIC_NR_SGIS)
+			dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
+		*lr &= ~GICH_LR_PENDING_BIT;
+
+		/*
+		 * If there's no state left on the LR (it could still be
+		 * active), then the LR does not hold any useful info and can
+		 * be marked as free for other use.
+		 */
+		if (!(*lr & GICH_LR_STATE))
+			vgic_retire_lr(i, irq, vgic_cpu);
+
+		/* Finally update the VGIC state. */
+		vgic_update_state(vcpu->kvm);
+	}
+}
+
 static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
 				  struct kvm_exit_mmio *mmio,
 				  phys_addr_t offset)
@@ -848,8 +922,6 @@ static void vgic_update_state(struct kvm *kvm)
 	}
 }
 
-#define LR_CPUID(lr)	\
-	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
 #define MK_LR_PEND(src, irq)	\
 	(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
 
@@ -871,9 +943,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 		int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
 
 		if (!vgic_irq_is_enabled(vcpu, irq)) {
-			vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-			clear_bit(lr, vgic_cpu->lr_used);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
+			vgic_retire_lr(lr, irq, vgic_cpu);
 			if (vgic_irq_is_active(vcpu, irq))
 				vgic_irq_clear_active(vcpu, irq);
 		}
@@ -1675,6 +1745,14 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 		}
 	}
 
+	/*
+	 * Move all pending IRQs from the LRs on all VCPUs so the pending
+	 * state can be properly represented in the register state accessible
+	 * through this API.
+	 */
+	kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
+		vgic_unqueue_irqs(tmp_vcpu);
+
 	offset -= r->base;
 	r->handle_mmio(vcpu, &mmio, offset);
 

From 8a280d12e7988c920e8805de50c0a7e129032d1b Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 25 Oct 2013 21:22:31 +0100
Subject: [PATCH 123/287] KVM: arm-vgic: Add GICD_SPENDSGIR and GICD_CPENDSGIR
 handlers

Handle MMIO accesses to the two registers which should support both the
case where the VMs want to read/write either of these registers and the
case where user space reads/writes these registers to do save/restore of
the VGIC state.

Note that the added complexity compared to simple set/clear enable
registers stems from the bookkeping of source cpu ids.  It may be
possible to change the underlying data structure to simplify the
complexity, but since this is not in the critical path at all, this will
do.

Also note that reading this register from a live guest will not be
accurate compared to on hardware, because some state may be living on
the CPU LRs and the only way to give a consistent read would be to force
stop all the VCPUs and request them to unqueu the LR state onto the
distributor.  Until we have an actual user of live reading this
register, we can live with the difference.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 90a5355ee7639e92c0492ec592cba5c31bd80687)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 70 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 66 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index d08ba28e729a..e59aaa4c64e5 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -663,18 +663,80 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 	}
 }
 
-static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
-				  struct kvm_exit_mmio *mmio,
-				  phys_addr_t offset)
+/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
+static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+					struct kvm_exit_mmio *mmio,
+					phys_addr_t offset)
 {
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int sgi;
+	int min_sgi = (offset & ~0x3) * 4;
+	int max_sgi = min_sgi + 3;
+	int vcpu_id = vcpu->vcpu_id;
+	u32 reg = 0;
+
+	/* Copy source SGIs from distributor side */
+	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+		int shift = 8 * (sgi - min_sgi);
+		reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
+	}
+
+	mmio_data_write(mmio, ~0, reg);
 	return false;
 }
 
+static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+					 struct kvm_exit_mmio *mmio,
+					 phys_addr_t offset, bool set)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	int sgi;
+	int min_sgi = (offset & ~0x3) * 4;
+	int max_sgi = min_sgi + 3;
+	int vcpu_id = vcpu->vcpu_id;
+	u32 reg;
+	bool updated = false;
+
+	reg = mmio_data_read(mmio, ~0);
+
+	/* Clear pending SGIs on the distributor */
+	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+		u8 mask = reg >> (8 * (sgi - min_sgi));
+		if (set) {
+			if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
+				updated = true;
+			dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
+		} else {
+			if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
+				updated = true;
+			dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
+		}
+	}
+
+	if (updated)
+		vgic_update_state(vcpu->kvm);
+
+	return updated;
+}
+
 static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
 				struct kvm_exit_mmio *mmio,
 				phys_addr_t offset)
 {
-	return false;
+	if (!mmio->is_write)
+		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+	else
+		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
+}
+
+static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	if (!mmio->is_write)
+		return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+	else
+		return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
 }
 
 /*

From a0a11ba68172ffeb62b0a7be8dc4162a0fc2ad6d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:57 -0700
Subject: [PATCH 124/287] KVM: arm-vgic: Support CPU interface reg access

Implement support for the CPU interface register access driven by MMIO
address offsets from the CPU interface base address.  Useful for user
space to support save/restore of the VGIC state.

This commit adds support only for the same logic as the current VGIC
support, and no more.  For example, the active priority registers are
handled as RAZ/WI, just like setting priorities on the emulated
distributor.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit fa20f5aea56f271f83e91b9cde00f043a5a14990)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 81 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 8 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e59aaa4c64e5..be456ce264d0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -71,6 +71,10 @@
 #define VGIC_ADDR_UNDEF		(-1)
 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
 
+#define PRODUCT_ID_KVM		0x4b	/* ASCII code K */
+#define IMPLEMENTER_ARM		0x43b
+#define GICC_ARCH_VERSION_V2	0x2
+
 /* Physical address of vgic virtual cpu interface */
 static phys_addr_t vgic_vcpu_base;
 
@@ -312,7 +316,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 	u32 word_offset = offset & 3;
 
 	switch (offset & ~3) {
-	case 0:			/* CTLR */
+	case 0:			/* GICD_CTLR */
 		reg = vcpu->kvm->arch.vgic.enabled;
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
@@ -323,15 +327,15 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 		}
 		break;
 
-	case 4:			/* TYPER */
+	case 4:			/* GICD_TYPER */
 		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
 		reg |= (VGIC_NR_IRQS >> 5) - 1;
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 		break;
 
-	case 8:			/* IIDR */
-		reg = 0x4B00043B;
+	case 8:			/* GICD_IIDR */
+		reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 		break;
@@ -1716,9 +1720,70 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
 				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
 {
-	return true;
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	u32 reg, mask = 0, shift = 0;
+	bool updated = false;
+
+	switch (offset & ~0x3) {
+	case GIC_CPU_CTRL:
+		mask = GICH_VMCR_CTRL_MASK;
+		shift = GICH_VMCR_CTRL_SHIFT;
+		break;
+	case GIC_CPU_PRIMASK:
+		mask = GICH_VMCR_PRIMASK_MASK;
+		shift = GICH_VMCR_PRIMASK_SHIFT;
+		break;
+	case GIC_CPU_BINPOINT:
+		mask = GICH_VMCR_BINPOINT_MASK;
+		shift = GICH_VMCR_BINPOINT_SHIFT;
+		break;
+	case GIC_CPU_ALIAS_BINPOINT:
+		mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
+		shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+		break;
+	}
+
+	if (!mmio->is_write) {
+		reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
+		mmio_data_write(mmio, ~0, reg);
+	} else {
+		reg = mmio_data_read(mmio, ~0);
+		reg = (reg << shift) & mask;
+		if (reg != (vgic_cpu->vgic_vmcr & mask))
+			updated = true;
+		vgic_cpu->vgic_vmcr &= ~mask;
+		vgic_cpu->vgic_vmcr |= reg;
+	}
+	return updated;
 }
 
+static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
+			     struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+	return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
+}
+
+static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
+				  struct kvm_exit_mmio *mmio,
+				  phys_addr_t offset)
+{
+	u32 reg;
+
+	if (mmio->is_write)
+		return false;
+
+	/* GICC_IIDR */
+	reg = (PRODUCT_ID_KVM << 20) |
+	      (GICC_ARCH_VERSION_V2 << 16) |
+	      (IMPLEMENTER_ARM << 0);
+	mmio_data_write(mmio, ~0, reg);
+	return false;
+}
+
+/*
+ * CPU Interface Register accesses - these are not accessed by the VM, but by
+ * user space for saving and restoring VGIC state.
+ */
 static const struct mmio_range vgic_cpu_ranges[] = {
 	{
 		.base		= GIC_CPU_CTRL,
@@ -1728,17 +1793,17 @@ static const struct mmio_range vgic_cpu_ranges[] = {
 	{
 		.base		= GIC_CPU_ALIAS_BINPOINT,
 		.len		= 4,
-		.handle_mmio	= handle_cpu_mmio_misc,
+		.handle_mmio	= handle_mmio_abpr,
 	},
 	{
 		.base		= GIC_CPU_ACTIVEPRIO,
 		.len		= 16,
-		.handle_mmio	= handle_cpu_mmio_misc,
+		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_CPU_IDENT,
 		.len		= 4,
-		.handle_mmio	= handle_cpu_mmio_misc,
+		.handle_mmio	= handle_cpu_mmio_ident,
 	},
 };
 

From c23fe6933f79809bae75b5e8c16c36d1ffc79e41 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 12 Dec 2013 16:12:22 +0000
Subject: [PATCH 125/287] arm64: KVM: Add Kconfig option for max VCPUs
 per-Guest

Current max VCPUs per-Guest is set to 4 which is preventing
us from creating a Guest (or VM) with 8 VCPUs on Host (e.g.
X-Gene Storm SOC) with 8 Host CPUs.

The correct value of max VCPUs per-Guest should be same as
the max CPUs supported by GICv2 which is 8 but, increasing
value of max VCPUs per-Guest can make things slower hence
we add Kconfig option to let KVM users select appropriate
max VCPUs per-Guest.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit da7814700a0c408bead58ce4714b7625ffbaade1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  7 ++++++-
 arch/arm64/kvm/Kconfig            | 11 +++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5d85a02d1231..0a1d69751562 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -26,7 +26,12 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
-#define KVM_MAX_VCPUS 4
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
+#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 4480ab339a00..8ba85e9ea388 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,17 @@ config KVM_ARM_HOST
 	---help---
 	  Provides host support for ARM processors.
 
+config KVM_ARM_MAX_VCPUS
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
+	help
+	  Static number of max supported virtual CPUs per VM.
+
+	  If you choose a high number, the vcpu structures will be quite
+	  large, so only choose a reasonable number that you expect to
+	  actually use.
+
 config KVM_ARM_VGIC
 	bool
 	depends on KVM_ARM_HOST && OF

From af4604a0f9c909b60d043cebb20110804639460e Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 14 Nov 2013 15:20:08 +0000
Subject: [PATCH 126/287] arm64: KVM: Support X-Gene guest VCPU on APM X-Gene
 host

This patch allows us to have X-Gene guest VCPU when using KVM arm64
on APM X-Gene host.

We add KVM_ARM_TARGET_XGENE_POTENZA for X-Gene Potenza compatible
guest VCPU and we return KVM_ARM_TARGET_XGENE_POTENZA in kvm_target_cpu()
when running on X-Gene host with Potenza core.

[maz: sanitized the commit log]

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit e28100bd8ed9e37b7cd4578140a1e7f95bd40835)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/uapi/asm/kvm.h    |  3 ++-
 arch/arm64/kvm/guest.c               | 32 +++++++++++++++++-----------
 arch/arm64/kvm/sys_regs_generic_v8.c |  3 +++
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 7c25ca8b02b3..495ab6f84a61 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -55,8 +55,9 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_AEM_V8		0
 #define KVM_ARM_TARGET_FOUNDATION_V8	1
 #define KVM_ARM_TARGET_CORTEX_A57	2
+#define KVM_ARM_TARGET_XGENE_POTENZA	3
 
-#define KVM_ARM_NUM_TARGETS		3
+#define KVM_ARM_NUM_TARGETS		4
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3f0731e53274..08745578d54d 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void)
 	unsigned long implementor = read_cpuid_implementor();
 	unsigned long part_number = read_cpuid_part_number();
 
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
+	switch (implementor) {
+	case ARM_CPU_IMP_ARM:
+		switch (part_number) {
+		case ARM_CPU_PART_AEM_V8:
+			return KVM_ARM_TARGET_AEM_V8;
+		case ARM_CPU_PART_FOUNDATION:
+			return KVM_ARM_TARGET_FOUNDATION_V8;
+		case ARM_CPU_PART_CORTEX_A57:
+			return KVM_ARM_TARGET_CORTEX_A57;
+		};
+		break;
+	case ARM_CPU_IMP_APM:
+		switch (part_number) {
+		case APM_CPU_PART_POTENZA:
+			return KVM_ARM_TARGET_XGENE_POTENZA;
+		};
+		break;
+	};
 
-	switch (part_number) {
-	case ARM_CPU_PART_AEM_V8:
-		return KVM_ARM_TARGET_AEM_V8;
-	case ARM_CPU_PART_FOUNDATION:
-		return KVM_ARM_TARGET_FOUNDATION_V8;
-	case ARM_CPU_PART_CORTEX_A57:
-		/* Currently handled by the generic backend */
-		return KVM_ARM_TARGET_CORTEX_A57;
-	default:
-		return -EINVAL;
-	}
+	return -EINVAL;
 }
 
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 4268ab9356b1..8fe6f76b0edc 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void)
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
 					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
+					  &genericv8_target_table);
+
 	return 0;
 }
 late_initcall(sys_reg_genericv8_init);

From dfbd506266a97ad252336ea96e41791fdd0c0828 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 12 Dec 2013 16:12:23 +0000
Subject: [PATCH 127/287] arm64: KVM: Force undefined exception for Guest SMC
 intructions

The SMC-based PSCI emulation for Guest is going to be very different
from the in-kernel HVC-based PSCI emulation hence for now just inject
undefined exception when Guest executes SMC instruction.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit e5cf9dcdbfd26cd4e1991db08755da900454efeb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/handle_exit.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 42a0f1bddfe7..7bc41eab4c64 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
-
 	kvm_inject_undefined(vcpu);
 	return 1;
 }

From 2aad15258f0795432073b14b616a634e452432f0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Sun, 29 Dec 2013 12:12:29 -0800
Subject: [PATCH 128/287] kvm: make local functions static

Running 'make namespacecheck' found lots of functions that
should be declared static, since only used in one file.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
(cherry picked from commit 7940876e1330671708186ac3386aa521ffb5c182)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 16 ----------------
 virt/kvm/ioapic.c        |  2 +-
 virt/kvm/ioapic.h        |  1 -
 virt/kvm/kvm_main.c      | 35 ++++++++++++++++++-----------------
 4 files changed, 19 insertions(+), 35 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1dfc17255ee0..95625d5cb3f0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -449,8 +449,6 @@ void kvm_exit(void);
 
 void kvm_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
-		     u64 last_generation);
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
 {
@@ -523,7 +521,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
-void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
@@ -535,7 +532,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
 
-void kvm_release_pfn_dirty(pfn_t pfn);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
@@ -562,8 +558,6 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			     gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
@@ -590,8 +584,6 @@ int kvm_get_dirty_log(struct kvm *kvm,
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 				struct kvm_dirty_log *log);
 
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem);
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 			bool line_status);
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -639,8 +631,6 @@ void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
-void kvm_free_physmem(struct kvm *kvm);
-
 void *kvm_kvzalloc(unsigned long size);
 void kvm_kvfree(const void *addr);
 
@@ -1067,12 +1057,6 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
 static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
 {
 }
-
-static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
-{
-	return true;
-}
-
 #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
 #endif
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 2d682977ce82..ce9ed99ad7dc 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -520,7 +520,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	return 0;
 }
 
-void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
+static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 {
 	int i;
 
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 615d8c995c3c..90d43e95dcf8 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -91,7 +91,6 @@ void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 		       int level, bool line_status);
 void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
-void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e9a43b6455be..2162bd5d17d7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -95,6 +95,12 @@ static int hardware_enable_all(void);
 static void hardware_disable_all(void);
 
 static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+static void update_memslots(struct kvm_memslots *slots,
+			    struct kvm_memory_slot *new, u64 last_generation);
+
+static void kvm_release_pfn_dirty(pfn_t pfn);
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_memory_slot *memslot, gfn_t gfn);
 
 bool kvm_rebooting;
 EXPORT_SYMBOL_GPL(kvm_rebooting);
@@ -552,7 +558,7 @@ static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
 	free->npages = 0;
 }
 
-void kvm_free_physmem(struct kvm *kvm)
+static void kvm_free_physmem(struct kvm *kvm)
 {
 	struct kvm_memslots *slots = kvm->memslots;
 	struct kvm_memory_slot *memslot;
@@ -674,8 +680,9 @@ static void sort_memslots(struct kvm_memslots *slots)
 		slots->id_to_index[slots->memslots[i].id] = i;
 }
 
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
-		     u64 last_generation)
+static void update_memslots(struct kvm_memslots *slots,
+			    struct kvm_memory_slot *new,
+			    u64 last_generation)
 {
 	if (new) {
 		int id = new->id;
@@ -923,8 +930,8 @@ int kvm_set_memory_region(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
 
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem)
+static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+					  struct kvm_userspace_memory_region *mem)
 {
 	if (mem->slot >= KVM_USER_MEM_SLOTS)
 		return -EINVAL;
@@ -1045,7 +1052,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
 }
 
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
-				 gfn_t gfn)
+					gfn_t gfn)
 {
 	return gfn_to_hva_many(slot, gfn, NULL);
 }
@@ -1385,18 +1392,11 @@ void kvm_release_page_dirty(struct page *page)
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
 
-void kvm_release_pfn_dirty(pfn_t pfn)
+static void kvm_release_pfn_dirty(pfn_t pfn)
 {
 	kvm_set_pfn_dirty(pfn);
 	kvm_release_pfn_clean(pfn);
 }
-EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
-
-void kvm_set_page_dirty(struct page *page)
-{
-	kvm_set_pfn_dirty(page_to_pfn(page));
-}
-EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
 
 void kvm_set_pfn_dirty(pfn_t pfn)
 {
@@ -1638,8 +1638,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
-			     gfn_t gfn)
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+				    struct kvm_memory_slot *memslot,
+				    gfn_t gfn)
 {
 	if (memslot && memslot->dirty_bitmap) {
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
@@ -1753,7 +1754,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
  *  locking does not harm. It may result in trying to yield to  same VCPU, fail
  *  and continue with next VCPU and so on.
  */
-bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 {
 	bool eligible;
 

From c9dad332f97620c9221a724e20d2bc4db51bb7a0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 13 Dec 2013 16:56:06 +0000
Subject: [PATCH 129/287] arm/arm64: KVM: relax the requirements of VMA
 alignment for THP

The THP code in KVM/ARM is a bit restrictive in not allowing a THP
to be used if the VMA is not 2MB aligned. Actually, it is not so much
the VMA that matters, but the associated memslot:

A process can perfectly mmap a region with no particular alignment
restriction, and then pass a 2MB aligned address to KVM. In this
case, KVM will only use this 2MB aligned region, and will ignore
the range between vma->vm_start and memslot->userspace_addr.

It can also choose to place this memslot at whatever alignment it
wants in the IPA space. In the end, what matters is the relative
alignment of the user space and IPA mappings with respect to a
2M page. They absolutely must be the same if you want to use THP.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 136d737fd20102f1be9b02356590fd55e3a40d0e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 659db0ed1370..7789857d1470 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
 	} else {
 		/*
-		 * Pages belonging to VMAs not aligned to the PMD mapping
-		 * granularity cannot be mapped using block descriptors even
-		 * if the pages belong to a THP for the process, because the
-		 * stage-2 block descriptor will cover more than a single THP
-		 * and we loose atomicity for unmapping, updates, and splits
-		 * of the THP or other pages in the stage-2 block range.
+		 * Pages belonging to memslots that don't have the same
+		 * alignment for userspace and IPA cannot be mapped using
+		 * block descriptors even if the pages belong to a THP for
+		 * the process, because the stage-2 block descriptor will
+		 * cover more than a single THP and we loose atomicity for
+		 * unmapping, updates, and splits of the THP or other pages
+		 * in the stage-2 block range.
 		 */
-		if (vma->vm_start & ~PMD_MASK)
+		if ((memslot->userspace_addr & ~PMD_MASK) !=
+		    ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
 			force_pte = true;
 	}
 	up_read(&current->mm->mmap_sem);

From 3b254a9515ba76de3746f4c28041845ba27b3745 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Tue, 7 Jan 2014 13:45:15 +0530
Subject: [PATCH 130/287] KVM: ARM: Remove duplicate include

trace.h was included twice. Remove duplicate inclusion.

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 61466710de078c697106fa5b70ec7afc9feab520)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/handle_exit.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index a92079011a83..0de91fc6de0f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -26,8 +26,6 @@
 
 #include "trace.h"
 
-#include "trace.h"
-
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)

From 328c366d43fda9d85c758c455b5f522747fa3d7e Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Thu, 9 Jan 2014 18:43:16 -0600
Subject: [PATCH 131/287] kvm: Provide kvm_vcpu_eligible_for_directed_yield()
 stub

Commit 7940876e1330671708186ac3386aa521ffb5c182 ("kvm: make local
functions static") broke KVM PPC builds due to removing (rather than
moving) the stub version of kvm_vcpu_eligible_for_directed_yield().

This patch reintroduces it.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Alexander Graf <agraf@suse.de>
[Move the #ifdef inside the function. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 4a55dd7273c95b4a19fbcf0ae1bbd1cfd09dfc36)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2162bd5d17d7..2d57b23683a4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1731,7 +1731,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
 
-#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 /*
  * Helper that checks whether a VCPU is eligible for directed yield.
  * Most eligible candidate to yield is decided by following heuristics:
@@ -1756,6 +1755,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
  */
 static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 {
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 	bool eligible;
 
 	eligible = !vcpu->spin_loop.in_spin_loop ||
@@ -1766,8 +1766,10 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 		kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
 
 	return eligible;
-}
+#else
+	return true;
 #endif
+}
 
 void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 {

From 031cbab9d6d2a6beca9e19472a40f5e22ecfbe07 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 29 Jan 2014 16:16:39 +0300
Subject: [PATCH 132/287] KVM: return an error code in
 kvm_vm_ioctl_register_coalesced_mmio()

If kvm_io_bus_register_dev() fails then it returns success but it should
return an error code.

I also did a little cleanup like removing an impossible NULL test.

Cc: stable@vger.kernel.org
Fixes: 2b3c246a682c ('KVM: Make coalesced mmio use a device per zone')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit aac5c4226e7136c331ed384c25d5560204da10a0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/coalesced_mmio.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 88b2fe3ddf42..00d86427af0f 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -154,17 +154,13 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 	list_add_tail(&dev->list, &kvm->coalesced_zones);
 	mutex_unlock(&kvm->slots_lock);
 
-	return ret;
+	return 0;
 
 out_free_dev:
 	mutex_unlock(&kvm->slots_lock);
-
 	kfree(dev);
 
-	if (dev == NULL)
-		return -ENXIO;
-
-	return 0;
+	return ret;
 }
 
 int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,

From e9aa8f627c7c504c3880acdfa57849bd3d75dc83 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Thu, 6 Jun 2013 15:32:37 +0200
Subject: [PATCH 133/287] KVM: async_pf: Provide additional direct page
 notification

By setting a Kconfig option, the architecture can control when
guest notifications will be presented by the apf backend.
There is the default batch mechanism, working as before, where the vcpu
thread should pull in this information.
Opposite to this, there is now the direct mechanism, that will push the
information to the guest.
This way s390 can use an already existing architecture interface.

Still the vcpu thread should call check_completion to cleanup leftovers.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
(cherry picked from commit e0ead41a6dac09f86675ce07a66e4b253a9b7bd5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/x86/kvm/mmu.c       |  2 +-
 include/linux/kvm_host.h |  2 +-
 virt/kvm/Kconfig         |  4 ++++
 virt/kvm/async_pf.c      | 20 ++++++++++++++++++--
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3c1877bbfe6a..f47d2e11108e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3232,7 +3232,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 	arch.direct_map = vcpu->arch.mmu.direct_map;
 	arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
 
-	return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
+	return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
 }
 
 static bool can_do_async_pf(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 95625d5cb3f0..f6801d10e04c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -180,7 +180,7 @@ struct kvm_async_pf {
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
 void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 		       struct kvm_arch_async_pf *arch);
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index fbe1a48bd629..13f2d19793e3 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -22,6 +22,10 @@ config KVM_MMIO
 config KVM_ASYNC_PF
        bool
 
+# Toggle to switch between direct notification and batch job
+config KVM_ASYNC_PF_SYNC
+       bool
+
 config HAVE_KVM_MSI
        bool
 
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8631d9c14320..00980ab02c45 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
 #include "async_pf.h"
 #include <trace/events/kvm.h>
 
+static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
+					       struct kvm_async_pf *work)
+{
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+	kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
+						struct kvm_async_pf *work)
+{
+#ifndef CONFIG_KVM_ASYNC_PF_SYNC
+	kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+
 static struct kmem_cache *async_pf_cache;
 
 int kvm_async_pf_init(void)
@@ -69,6 +84,7 @@ static void async_pf_execute(struct work_struct *work)
 	down_read(&mm->mmap_sem);
 	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
+	kvm_async_page_present_sync(vcpu, apf);
 	unuse_mm(mm);
 
 	spin_lock(&vcpu->async_pf.lock);
@@ -138,7 +154,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 	}
 }
 
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 		       struct kvm_arch_async_pf *arch)
 {
 	struct kvm_async_pf *work;
@@ -159,7 +175,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 	work->wakeup_all = false;
 	work->vcpu = vcpu;
 	work->gva = gva;
-	work->addr = gfn_to_hva(vcpu->kvm, gfn);
+	work->addr = hva;
 	work->arch = *arch;
 	work->mm = current->mm;
 	atomic_inc(&work->mm->mm_count);

From 65d762a03f29316956725f348546794de0160fcc Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Tue, 3 Sep 2013 12:31:16 +0200
Subject: [PATCH 134/287] KVM: async_pf: Allow to wait for outstanding work

On s390 we are not able to cancel work. Instead we will flush the work and wait for
completion.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
(cherry picked from commit 9f2ceda49c6b8827c795731c204f6c2587886e2c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 00980ab02c45..889aad022014 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -113,11 +113,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 			list_entry(vcpu->async_pf.queue.next,
 				   typeof(*work), queue);
 		list_del(&work->queue);
+
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+		flush_work(&work->work);
+#else
 		if (cancel_work_sync(&work->work)) {
 			mmdrop(work->mm);
 			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
 		}
+#endif
 	}
 
 	spin_lock(&vcpu->async_pf.lock);

From c76f5b54ec88ab03043cdd2ab2c148daa0bccd7c Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Fri, 31 Jan 2014 14:32:46 +0100
Subject: [PATCH 135/287] KVM: async_pf: Add missing call for async page
 present

Commit KVM: async_pf: Provide additional direct page notification
missed the call from kvm_check_async_pf_completion to the new introduced function.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 1179ba539541347d5427cde8bcfdaa5ead14f3aa)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 889aad022014..10df100c4514 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -151,7 +151,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 		spin_unlock(&vcpu->async_pf.lock);
 
 		kvm_arch_async_page_ready(vcpu, work);
-		kvm_arch_async_page_present(vcpu, work);
+		kvm_async_page_present_async(vcpu, work);
 
 		list_del(&work->queue);
 		vcpu->async_pf.queued--;

From 8cf654c3d7b883a4ffc64e6301b4af5a9f3b6b93 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 5 Feb 2014 10:24:12 +0000
Subject: [PATCH 136/287] arm64: fix typo: s/SERRROR/SERROR/

Somehow SERROR has acquired an additional 'R' in a couple of headers.
This patch removes them before they spread further. As neither instance
is in use yet, no other sites need to be fixed up.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit bfb67a5606376bb32cb6f93dc05cda2e8c2038a5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index c98ef4771c73..0eb398655378 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -231,7 +231,7 @@
 #define ESR_EL2_EC_SP_ALIGN	(0x26)
 #define ESR_EL2_EC_FP_EXC32	(0x28)
 #define ESR_EL2_EC_FP_EXC64	(0x2C)
-#define ESR_EL2_EC_SERRROR	(0x2F)
+#define ESR_EL2_EC_SERROR	(0x2F)
 #define ESR_EL2_EC_BREAKPT	(0x30)
 #define ESR_EL2_EC_BREAKPT_HYP	(0x31)
 #define ESR_EL2_EC_SOFTSTP	(0x32)

From 4635cac7cb81e457a59eb1cbea32d828f6c25a05 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Sat, 8 Feb 2014 08:51:57 +0100
Subject: [PATCH 137/287] asmlinkage, kvm: Make kvm_rebooting visible

kvm_rebooting is referenced from assembler code, thus
needs to be visible.

Cc: Gleb Natapov <gleb@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/1391845930-28580-1-git-send-email-ak@linux.intel.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
(cherry picked from commit 52480137d82062bb8d0fb778cb9934667958e367)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2d57b23683a4..88660cbe70a4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn);
 static void mark_page_dirty_in_slot(struct kvm *kvm,
 				    struct kvm_memory_slot *memslot, gfn_t gfn);
 
-bool kvm_rebooting;
+__visible bool kvm_rebooting;
 EXPORT_SYMBOL_GPL(kvm_rebooting);
 
 static bool largepages_enabled = true;

From 312c49473d6543ad3888172fc63992e5a326c622 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sun, 2 Feb 2014 13:41:02 -0800
Subject: [PATCH 138/287] arm64: KVM: Add VGIC device control for arm64

This fixes the build breakage introduced by
c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f and adds support for the device
control API and save/restore of the VGIC state for ARMv8.

The defines were simply missing from the arm64 header files and
uaccess.h must be implicitly imported from somewhere else on arm.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 2a2f3e269c75edf916de5967079069aeb6a601cb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/uapi/asm/kvm.h | 9 +++++++++
 virt/kvm/arm/vgic.c               | 1 +
 2 files changed, 10 insertions(+)

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 495ab6f84a61..eaf54a30bedc 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -148,6 +148,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
 #define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index be456ce264d0..8ca405cd7c1a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -24,6 +24,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/uaccess.h>
 
 #include <linux/irqchip/arm-gic.h>
 

From 2aaea8b6ef1ba1dd5de6646abf86a4bb2a3a0642 Mon Sep 17 00:00:00 2001
From: Michael Mueller <mimu@linux.vnet.ibm.com>
Date: Wed, 26 Feb 2014 16:14:18 +0100
Subject: [PATCH 139/287] KVM: add kvm_arch_vcpu_runnable() test to
 kvm_vcpu_on_spin() loop

Use the arch specific function kvm_arch_vcpu_runnable() to add a further
criterium to identify a suitable vcpu to yield to during undirected yield
processing.

Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 98f4a14676127397c54cab7d6119537ed4d113a2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 88660cbe70a4..9f5fab0a4fda 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1800,7 +1800,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
 				continue;
 			if (vcpu == me)
 				continue;
-			if (waitqueue_active(&vcpu->wq))
+			if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
 				continue;
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;

From 24e14ecb92fafddb1f3c9cce1b4aed3596f86859 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 26 Feb 2014 18:47:36 +0000
Subject: [PATCH 140/287] arm/arm64: KVM: detect CPU reset on CPU_PM_EXIT

Commit 1fcf7ce0c602 (arm: kvm: implement CPU PM notifier) added
support for CPU power-management, using a cpu_notifier to re-init
KVM on a CPU that entered CPU idle.

The code assumed that a CPU entering idle would actually be powered
off, loosing its state entierely, and would then need to be
reinitialized. It turns out that this is not always the case, and
some HW performs CPU PM without actually killing the core. In this
case, we try to reinitialize KVM while it is still live. It ends up
badly, as reported by Andre Przywara (using a Calxeda Midway):

[    3.663897] Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x685760
[    3.663897] unexpected data abort in Hyp mode at: 0xc067d150
[    3.663897] unexpected HVC/SVC trap in Hyp mode at: 0xc0901dd0

The trick here is to detect if we've been through a full re-init or
not by looking at HVBAR (VBAR_EL2 on arm64). This involves
implementing the backend for __hyp_get_vectors in the main KVM HYP
code (rather small), and checking the return value against the
default one when the CPU notifier is called on CPU_PM_EXIT.

Reported-by: Andre Przywara <osp@andrep.de>
Tested-by: Andre Przywara <osp@andrep.de>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Rob Herring <rob.herring@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit b20c9f29c5c25921c6ad18b50d4b61e6d181c3cc)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c        |  3 ++-
 arch/arm/kvm/interrupts.S | 11 ++++++++++-
 arch/arm64/kvm/hyp.S      | 27 +++++++++++++++++++++++++--
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 169c718ddd90..9804406ff37e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -878,7 +878,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
 				    unsigned long cmd,
 				    void *v)
 {
-	if (cmd == CPU_PM_EXIT) {
+	if (cmd == CPU_PM_EXIT &&
+	    __hyp_get_vectors() == hyp_default_vectors) {
 		cpu_init_hyp_mode(NULL);
 		return NOTIFY_OK;
 	}
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index ddc15539bad2..0d68d4073068 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -220,6 +220,10 @@ after_vfp_restore:
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
  * passed in r0 and r1.
  *
+ * A function pointer with a value of 0xffffffff has a special meaning,
+ * and is used to implement __hyp_get_vectors in the same way as in
+ * arch/arm/kernel/hyp_stub.S.
+ *
  * The calling convention follows the standard AAPCS:
  *   r0 - r3: caller save
  *   r12:     caller save
@@ -363,6 +367,11 @@ hyp_hvc:
 host_switch_to_hyp:
 	pop	{r0, r1, r2}
 
+	/* Check for __hyp_get_vectors */
+	cmp	r0, #-1
+	mrceq	p15, 4, r0, c12, c0, 0	@ get HVBAR
+	beq	1f
+
 	push	{lr}
 	mrs	lr, SPSR
 	push	{lr}
@@ -378,7 +387,7 @@ THUMB(	orr	lr, #1)
 	pop	{lr}
 	msr	SPSR_csxf, lr
 	pop	{lr}
-	eret
+1:	eret
 
 guest_trap:
 	load_vcpu			@ Load VCPU pointer to r0
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 3b47c36e10ff..2c56012cb2d2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -694,6 +694,24 @@ __hyp_panic_str:
 
 	.align	2
 
+/*
+ * u64 kvm_call_hyp(void *hypfn, ...);
+ *
+ * This is not really a variadic function in the classic C-way and care must
+ * be taken when calling this to ensure parameters are passed in registers
+ * only, since the stack will change between the caller and the callee.
+ *
+ * Call the function with the first argument containing a pointer to the
+ * function you wish to call in Hyp mode, and subsequent arguments will be
+ * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
+ * function pointer can be passed).  The function being called must be mapped
+ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
+ * passed in r0 and r1.
+ *
+ * A function pointer with a value of 0 has a special meaning, and is
+ * used to implement __hyp_get_vectors in the same way as in
+ * arch/arm64/kernel/hyp_stub.S.
+ */
 ENTRY(kvm_call_hyp)
 	hvc	#0
 	ret
@@ -737,7 +755,12 @@ el1_sync:					// Guest trapped into EL2
 	pop	x2, x3
 	pop	x0, x1
 
-	push	lr, xzr
+	/* Check for __hyp_get_vectors */
+	cbnz	x0, 1f
+	mrs	x0, vbar_el2
+	b	2f
+
+1:	push	lr, xzr
 
 	/*
 	 * Compute the function address in EL2, and shuffle the parameters.
@@ -750,7 +773,7 @@ el1_sync:					// Guest trapped into EL2
 	blr	lr
 
 	pop	lr, xzr
-	eret
+2:	eret
 
 el1_trap:
 	/*

From fca920166b7c07b3932b2a8a24916e5002a57900 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 Jan 2014 19:13:10 +0000
Subject: [PATCH 141/287] arm64: KVM: force cache clean on page fault when
 caches are off

In order for the guest with caches off to observe data written
contained in a given page, we need to make sure that page is
committed to memory, and not just hanging in the cache (as
guest accesses are completely bypassing the cache until it
decides to enable it).

For this purpose, hook into the coherent_icache_guest_page
function and flush the region if the guest SCTLR_EL1
register doesn't show the MMU  and caches as being enabled.
The function also get renamed to coherent_cache_guest_page.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 2d58b733c87689d3d5144e4ac94ea861cc729145)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  4 ++--
 arch/arm/kvm/mmu.c               |  4 ++--
 arch/arm64/include/asm/kvm_mmu.h | 16 ++++++++++++----
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 2d122adcdb22..6d0f3d3023b7 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -116,8 +116,8 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
-					      unsigned long size)
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+					     unsigned long size)
 {
 	/*
 	 * If we are going to insert an instruction page and the icache is
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7789857d1470..fc71a8df0e13 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -715,7 +715,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE);
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
@@ -723,7 +723,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_icache_guest_page(kvm, hva, PAGE_SIZE);
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
 	}
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7f1f9408ff66..6eaf69b5e42c 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -106,7 +106,6 @@ static inline bool kvm_is_write_fault(unsigned long esr)
 	return true;
 }
 
-static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
 static inline void kvm_clean_pgd(pgd_t *pgd) {}
 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
 static inline void kvm_clean_pte(pte_t *pte) {}
@@ -124,9 +123,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 
 struct kvm;
 
-static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
-					      unsigned long size)
+#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
+	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+}
+
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+					     unsigned long size)
+{
+	if (!vcpu_has_cache_enabled(vcpu))
+		kvm_flush_dcache_to_poc((void *)hva, size);
+
 	if (!icache_is_aliasing()) {		/* PIPT */
 		flush_icache_range(hva, hva + size);
 	} else if (!icache_is_aivivt()) {	/* non ASID-tagged VIVT */
@@ -135,7 +144,6 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
 	}
 }
 
-#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
 #define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
 #endif /* __ASSEMBLY__ */

From 96d03922c8cb37a48451b02f95e80cd749266bbe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Jan 2014 10:55:17 +0000
Subject: [PATCH 142/287] arm64: KVM: allows discrimination of AArch32 sysreg
 access

The current handling of AArch32 trapping is slightly less than
perfect, as it is not possible (from a handler point of view)
to distinguish it from an AArch64 access, nor to tell a 32bit
from a 64bit access either.

Fix this by introducing two additional flags:
- is_aarch32: true if the access was made in AArch32 mode
- is_32bit: true if is_aarch32 == true and a MCR/MRC instruction
  was used to perform the access (as opposed to MCRR/MRRC).

This allows a handler to cover all the possible conditions in which
a system register gets trapped.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 2072d29c46b73e39b3c6c56c6027af77086f45fd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 6 ++++++
 arch/arm64/kvm/sys_regs.h | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 02e9d09e1d80..bf03e0fadf1f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -437,6 +437,8 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
 	int Rt2 = (hsr >> 10) & 0xf;
 
+	params.is_aarch32 = true;
+	params.is_32bit = false;
 	params.CRm = (hsr >> 1) & 0xf;
 	params.Rt = (hsr >> 5) & 0xf;
 	params.is_write = ((hsr & 1) == 0);
@@ -480,6 +482,8 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
 
+	params.is_aarch32 = true;
+	params.is_32bit = true;
 	params.CRm = (hsr >> 1) & 0xf;
 	params.Rt  = (hsr >> 5) & 0xf;
 	params.is_write = ((hsr & 1) == 0);
@@ -549,6 +553,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	struct sys_reg_params params;
 	unsigned long esr = kvm_vcpu_get_hsr(vcpu);
 
+	params.is_aarch32 = false;
+	params.is_32bit = false;
 	params.Op0 = (esr >> 20) & 3;
 	params.Op1 = (esr >> 14) & 0x7;
 	params.CRn = (esr >> 10) & 0xf;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index d50d3722998e..d411e251412c 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -30,6 +30,8 @@ struct sys_reg_params {
 	u8	Op2;
 	u8	Rt;
 	bool	is_write;
+	bool	is_aarch32;
+	bool	is_32bit;	/* Only valid if is_aarch32 is true */
 };
 
 struct sys_reg_desc {

From 8d8d8cd59448d8f2f9cba73d6cd5558ebe7d5a43 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 Jan 2014 18:00:55 +0000
Subject: [PATCH 143/287] arm64: KVM: trap VM system registers until MMU and
 caches are ON

In order to be able to detect the point where the guest enables
its MMU and caches, trap all the VM related system registers.

Once we see the guest enabling both the MMU and the caches, we
can go back to a saner mode of operation, which is to leave these
registers in complete control of the guest.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4d44923b17bff283c002ed961373848284aaff1b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  3 +-
 arch/arm64/include/asm/kvm_asm.h |  3 +-
 arch/arm64/kvm/sys_regs.c        | 90 +++++++++++++++++++++++++++-----
 3 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 0eb398655378..00fbaa75dc7b 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -62,6 +62,7 @@
  * RW:		64bit by default, can be overriden for 32bit VMs
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
+ * TVM:		Trap VM ops (until M+C set in SCTLR_EL1)
  * TSW:		Trap cache operations by set/way
  * TWE:		Trap WFE
  * TWI:		Trap WFI
@@ -74,7 +75,7 @@
  * SWIO:	Turn set/way invalidates into set/way clean+invalidate
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
-			 HCR_BSU_IS | HCR_FB | HCR_TAC | \
+			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
 			 HCR_AMO | HCR_IMO | HCR_FMO | \
 			 HCR_SWIO | HCR_TIDCP | HCR_RW)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b25763bc0ec4..9fcd54b1e16d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -79,7 +79,8 @@
 #define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
 #define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
 #define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
-#define c10_AMAIR	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
 #define NR_CP15_REGS	(NR_SYS_REGS * 2)
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index bf03e0fadf1f..2097e5ecba42 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -120,6 +120,46 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+			  const struct sys_reg_params *p,
+			  const struct sys_reg_desc *r)
+{
+	unsigned long val;
+
+	BUG_ON(!p->is_write);
+
+	val = *vcpu_reg(vcpu, p->Rt);
+	if (!p->is_aarch32) {
+		vcpu_sys_reg(vcpu, r->reg) = val;
+	} else {
+		vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;
+		if (!p->is_32bit)
+			vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
+	}
+	return true;
+}
+
+/*
+ * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set.  If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ */
+static bool access_sctlr(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	access_vm_reg(vcpu, p, r);
+
+	if (vcpu_has_cache_enabled(vcpu))	/* MMU+Caches enabled? */
+		vcpu->arch.hcr_el2 &= ~HCR_TVM;
+
+	return true;
+}
+
 /*
  * We could trap ID_DFR0 and tell the guest we don't support performance
  * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
@@ -185,32 +225,32 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_mpidr, MPIDR_EL1 },
 	/* SCTLR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
-	  NULL, reset_val, SCTLR_EL1, 0x00C50078 },
+	  access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
 	/* CPACR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
 	  NULL, reset_val, CPACR_EL1, 0 },
 	/* TTBR0_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
-	  NULL, reset_unknown, TTBR0_EL1 },
+	  access_vm_reg, reset_unknown, TTBR0_EL1 },
 	/* TTBR1_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
-	  NULL, reset_unknown, TTBR1_EL1 },
+	  access_vm_reg, reset_unknown, TTBR1_EL1 },
 	/* TCR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
-	  NULL, reset_val, TCR_EL1, 0 },
+	  access_vm_reg, reset_val, TCR_EL1, 0 },
 
 	/* AFSR0_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
-	  NULL, reset_unknown, AFSR0_EL1 },
+	  access_vm_reg, reset_unknown, AFSR0_EL1 },
 	/* AFSR1_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
-	  NULL, reset_unknown, AFSR1_EL1 },
+	  access_vm_reg, reset_unknown, AFSR1_EL1 },
 	/* ESR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
-	  NULL, reset_unknown, ESR_EL1 },
+	  access_vm_reg, reset_unknown, ESR_EL1 },
 	/* FAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
-	  NULL, reset_unknown, FAR_EL1 },
+	  access_vm_reg, reset_unknown, FAR_EL1 },
 	/* PAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
 	  NULL, reset_unknown, PAR_EL1 },
@@ -224,17 +264,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* MAIR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
-	  NULL, reset_unknown, MAIR_EL1 },
+	  access_vm_reg, reset_unknown, MAIR_EL1 },
 	/* AMAIR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
-	  NULL, reset_amair_el1, AMAIR_EL1 },
+	  access_vm_reg, reset_amair_el1, AMAIR_EL1 },
 
 	/* VBAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
 	  NULL, reset_val, VBAR_EL1, 0 },
 	/* CONTEXTIDR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
-	  NULL, reset_val, CONTEXTIDR_EL1, 0 },
+	  access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
 	/* TPIDR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
 	  NULL, reset_unknown, TPIDR_EL1 },
@@ -305,14 +345,32 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_val, FPEXC32_EL2, 0x70 },
 };
 
-/* Trapped cp15 registers */
+/*
+ * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
+ * depending on the way they are accessed (as a 32bit or a 64bit
+ * register).
+ */
 static const struct sys_reg_desc cp15_regs[] = {
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
+	{ Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
+	{ Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
+	{ Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
+	{ Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
+	{ Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
+	{ Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
+	{ Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+
 	/*
 	 * DC{C,I,CI}SW operations:
 	 */
 	{ Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
 	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
 	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+
 	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
 	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
 	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
@@ -326,6 +384,14 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
 	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
 	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
+
+	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
+	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
+	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
+	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+
+	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
 };
 
 /* Target specific emulation tables */

From 086ffbacac938e5d6208d286ffdf01867753a071 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 18 Feb 2014 14:29:03 +0000
Subject: [PATCH 144/287] ARM: KVM: introduce kvm_p*d_addr_end

The use of p*d_addr_end with stage-2 translation is slightly dodgy,
as the IPA is 40bits, while all the p*d_addr_end helpers are
taking an unsigned long (arm64 is fine with that as unligned long
is 64bit).

The fix is to introduce 64bit clean versions of the same helpers,
and use them in the stage-2 page table code.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit a3c8bd31af260a17d626514f636849ee1cd1f63e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   | 13 +++++++++++++
 arch/arm/kvm/mmu.c               | 10 +++++-----
 arch/arm64/include/asm/kvm_mmu.h |  4 ++++
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 6d0f3d3023b7..891afe78311a 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -114,6 +114,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 	pmd_val(*pmd) |= L_PMD_S2_RDWR;
 }
 
+/* Open coded p*d_addr_end that can deal with 64bit addresses */
+#define kvm_pgd_addr_end(addr, end)					\
+({	u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;		\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+
+#define kvm_pud_addr_end(addr,end)		(end)
+
+#define kvm_pmd_addr_end(addr, end)					\
+({	u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK;		\
+	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
+})
+
 struct kvm;
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index fc71a8df0e13..c1c08b240f35 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -145,7 +145,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 		pgd = pgdp + pgd_index(addr);
 		pud = pud_offset(pgd, addr);
 		if (pud_none(*pud)) {
-			addr = pud_addr_end(addr, end);
+			addr = kvm_pud_addr_end(addr, end);
 			continue;
 		}
 
@@ -155,13 +155,13 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 			 * move on.
 			 */
 			clear_pud_entry(kvm, pud, addr);
-			addr = pud_addr_end(addr, end);
+			addr = kvm_pud_addr_end(addr, end);
 			continue;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
-			addr = pmd_addr_end(addr, end);
+			addr = kvm_pmd_addr_end(addr, end);
 			continue;
 		}
 
@@ -176,10 +176,10 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 		 */
 		if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
-			next = pmd_addr_end(addr, end);
+			next = kvm_pmd_addr_end(addr, end);
 			if (page_empty(pmd) && !page_empty(pud)) {
 				clear_pud_entry(kvm, pud, addr);
-				next = pud_addr_end(addr, end);
+				next = kvm_pud_addr_end(addr, end);
 			}
 		}
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6eaf69b5e42c..00c0cc8b8045 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -121,6 +121,10 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 	pmd_val(*pmd) |= PMD_S2_RDWR;
 }
 
+#define kvm_pgd_addr_end(addr, end)	pgd_addr_end(addr, end)
+#define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
+#define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))

From 5ac10a803ba46a03fccfbd078b60fa0cd83422c0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 15 Jan 2014 12:50:23 +0000
Subject: [PATCH 145/287] arm64: KVM: flush VM pages before letting the guest
 enable caches

When the guest runs with caches disabled (like in an early boot
sequence, for example), all the writes are diectly going to RAM,
bypassing the caches altogether.

Once the MMU and caches are enabled, whatever sits in the cache
becomes suddenly visible, which isn't what the guest expects.

A way to avoid this potential disaster is to invalidate the cache
when the MMU is being turned on. For this, we hook into the SCTLR_EL1
trapping code, and scan the stage-2 page tables, invalidating the
pages/sections that have already been mapped in.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 9d218a1fcf4c6b759d442ef702842fae92e1ea61)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  2 +
 arch/arm/kvm/mmu.c               | 93 ++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/kvm_mmu.h |  2 +
 arch/arm64/kvm/sys_regs.c        |  5 +-
 4 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 891afe78311a..eb85b81eea6f 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -155,6 +155,8 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
 #define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
+void stage2_flush_vm(struct kvm *kvm);
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index c1c08b240f35..d7e998c6a08f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -187,6 +187,99 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 	}
 }
 
+static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pte_t *pte;
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+			kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pmd_t *pmd;
+	phys_addr_t next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+				kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
+			} else {
+				stage2_flush_ptes(kvm, pmd, addr, next);
+			}
+		}
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
+			      phys_addr_t addr, phys_addr_t end)
+{
+	pud_t *pud;
+	phys_addr_t next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+				kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
+			} else {
+				stage2_flush_pmds(kvm, pud, addr, next);
+			}
+		}
+	} while (pud++, addr = next, addr != end);
+}
+
+static void stage2_flush_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+	phys_addr_t next;
+	pgd_t *pgd;
+
+	pgd = kvm->arch.pgd + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		stage2_flush_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the stage 2 page tables and invalidate any cache lines
+ * backing memory already mapped to the VM.
+ */
+void stage2_flush_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_flush_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * free_boot_hyp_pgd - free HYP boot page tables
  *
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 00c0cc8b8045..7d29847a893b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -150,5 +150,7 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 
 #define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
+void stage2_flush_vm(struct kvm *kvm);
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2097e5ecba42..03244582bc55 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -27,6 +27,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <trace/events/kvm.h>
@@ -154,8 +155,10 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
 {
 	access_vm_reg(vcpu, p, r);
 
-	if (vcpu_has_cache_enabled(vcpu))	/* MMU+Caches enabled? */
+	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
 		vcpu->arch.hcr_el2 &= ~HCR_TVM;
+		stage2_flush_vm(vcpu->kvm);
+	}
 
 	return true;
 }

From ebdea56c8b2acd4fd7a421d1d670e6bf10f37e60 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 Jan 2014 19:13:10 +0000
Subject: [PATCH 146/287] ARM: KVM: force cache clean on page fault when caches
 are off

In order for a guest with caches disabled to observe data written
contained in a given page, we need to make sure that page is
committed to memory, and not just hanging in the cache (as guest
accesses are completely bypassing the cache until it decides to
enable it).

For this purpose, hook into the coherent_cache_guest_page
function and flush the region if the guest SCTLR
register doesn't show the MMU and caches as being enabled.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 159793001d7d85af17855630c94f0a176848e16b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index eb85b81eea6f..5c7aa3c1519f 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -129,9 +129,19 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 
 struct kvm;
 
+#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
+}
+
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 					     unsigned long size)
 {
+	if (!vcpu_has_cache_enabled(vcpu))
+		kvm_flush_dcache_to_poc((void *)hva, size);
+	
 	/*
 	 * If we are going to insert an instruction page and the icache is
 	 * either VIPT or PIPT, there is a potential problem where the host
@@ -152,7 +162,6 @@ static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
 	}
 }
 
-#define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
 #define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
 void stage2_flush_vm(struct kvm *kvm);

From d773d11dd453f43793082c5206799c818a4a4b55 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Jan 2014 18:56:26 +0000
Subject: [PATCH 147/287] ARM: KVM: fix handling of trapped 64bit coprocessor
 accesses

Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling)
changed the way we match the 64bit coprocessor access from
user space, but didn't update the trap handler for the same
set of registers.

The effect is that a trapped 64bit access is never matched, leading
to a fault being injected into the guest. This went unnoticed as we
didn't really trap any 64bit register so far.

Placing the CRm field of the access into the CRn field of the matching
structure fixes the problem. Also update the debug feature to emit the
expected string in case of failing match.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 46c214dd595381c880794413facadfa07fba5c95)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c | 4 ++--
 arch/arm/kvm/coproc.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 78c0885d6501..126c90d18387 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -443,7 +443,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	struct coproc_params params;
 
-	params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+	params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
 	params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
 	params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
 	params.is_64bit = true;
@@ -451,7 +451,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
 	params.Op2 = 0;
 	params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
-	params.CRn = 0;
+	params.CRm = 0;
 
 	return emulate_cp15(vcpu, &params);
 }
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 0461d5c8d3de..c5ad7ff40c96 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p)
 {
 	/* Look, we even formatted it for you to paste into the table! */
 	if (p->is_64bit) {
-		kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n",
-			      p->CRm, p->Op1, p->is_write ? "write" : "read");
+		kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n",
+			      p->CRn, p->Op1, p->is_write ? "write" : "read");
 	} else {
 		kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
 			      " func_%s },\n",

From ba3c65fc739a98f9eae4e8997d8a6e66d130b0cf Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 21 Jan 2014 18:56:26 +0000
Subject: [PATCH 148/287] ARM: KVM: fix ordering of 64bit coprocessor accesses

Commit 240e99cbd00a (ARM: KVM: Fix 64-bit coprocessor handling)
added an ordering dependency for the 64bit registers.

The order described is: CRn, CRm, Op1, Op2, 64bit-first.

Unfortunately, the implementation is: CRn, 64bit-first, CRm...

Move the 64bit test to be last in order to match the documentation.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 547f781378a22b65c2ab468f235c23001b5924da)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index c5ad7ff40c96..8dda870e84f9 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 		return -1;
 	if (i1->CRn != i2->CRn)
 		return i1->CRn - i2->CRn;
-	if (i1->is_64 != i2->is_64)
-		return i2->is_64 - i1->is_64;
 	if (i1->CRm != i2->CRm)
 		return i1->CRm - i2->CRm;
 	if (i1->Op1 != i2->Op1)
 		return i1->Op1 - i2->Op1;
-	return i1->Op2 - i2->Op2;
+	if (i1->Op2 != i2->Op2)
+		return i1->Op2 - i2->Op2;
+	return i2->is_64 - i1->is_64;
 }
 
 

From ff860e2b7761196f6379888acd6668ecddb703df Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 22 Jan 2014 09:43:38 +0000
Subject: [PATCH 149/287] ARM: KVM: introduce per-vcpu HYP Configuration
 Register

So far, KVM/ARM used a fixed HCR configuration per guest, except for
the VI/VF/VA bits to control the interrupt in absence of VGIC.

With the upcoming need to dynamically reconfigure trapping, it becomes
necessary to allow the HCR to be changed on a per-vcpu basis.

The fix here is to mimic what KVM/arm64 already does: a per vcpu HCR
field, initialized at setup time.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit ac30a11e8e92a03dbe236b285c5cbae0bf563141)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h  | 1 -
 arch/arm/include/asm/kvm_host.h | 9 ++++++---
 arch/arm/kernel/asm-offsets.c   | 1 +
 arch/arm/kvm/guest.c            | 1 +
 arch/arm/kvm/interrupts_head.S  | 9 +++------
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 1d3153c7eb41..a843e74a384c 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -69,7 +69,6 @@
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
 			HCR_TWE | HCR_SWIO | HCR_TIDCP)
-#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
 
 /* System Control Register (SCTLR) bits */
 #define SCTLR_TE	(1 << 30)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 098f7dd6d564..09af14999c9b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -101,6 +101,12 @@ struct kvm_vcpu_arch {
 	/* The CPU type we expose to the VM */
 	u32 midr;
 
+	/* HYP trapping configuration */
+	u32 hcr;
+
+	/* Interrupt related fields */
+	u32 irq_lines;		/* IRQ and FIQ levels */
+
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
@@ -128,9 +134,6 @@ struct kvm_vcpu_arch {
 	/* IO related fields */
 	struct kvm_decode mmio_decode;
 
-	/* Interrupt related fields */
-	u32 irq_lines;		/* IRQ and FIQ levels */
-
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce6b48e..aa2acc1dd986 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -168,6 +168,7 @@ int main(void)
   DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
   DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
   DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
+  DEFINE(VCPU_HCR,		offsetof(struct kvm_vcpu, arch.hcr));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.fault.hsr));
   DEFINE(VCPU_HxFAR,		offsetof(struct kvm_vcpu, arch.fault.hxfar));
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 2786eae10c0d..b23a59c1c522 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 6f18695a09cb..a37270d7d4d6 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -597,17 +597,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 /* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
 .macro configure_hyp_role operation
-	mrc	p15, 4, r2, c1, c1, 0	@ HCR
-	bic	r2, r2, #HCR_VIRT_EXCP_MASK
-	ldr	r3, =HCR_GUEST_MASK
 	.if \operation == vmentry
-	orr	r2, r2, r3
+	ldr	r2, [vcpu, #VCPU_HCR]
 	ldr	r3, [vcpu, #VCPU_IRQ_LINES]
 	orr	r2, r2, r3
 	.else
-	bic	r2, r2, r3
+	mov	r2, #0
 	.endif
-	mcr	p15, 4, r2, c1, c1, 0
+	mcr	p15, 4, r2, c1, c1, 0	@ HCR
 .endm
 
 .macro load_vcpu

From 6076945ed741ff33724410d0bb9dfa535b475e79 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 22 Jan 2014 10:20:09 +0000
Subject: [PATCH 150/287] ARM: KVM: add world-switch for AMAIR{0,1}

HCR.TVM traps (among other things) accesses to AMAIR0 and AMAIR1.
In order to minimise the amount of surprise a guest could generate by
trying to access these registers with caches off, add them to the
list of registers we switch/handle.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit af20814ee927ed888288d98917a766b4179c4fe0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h |  4 +++-
 arch/arm/kvm/coproc.c          |  6 ++++++
 arch/arm/kvm/interrupts_head.S | 12 ++++++++++--
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 661da11f76f4..53b3c4a50d5c 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -48,7 +48,9 @@
 #define c13_TID_URO	26	/* Thread ID, User R/O */
 #define c13_TID_PRIV	27	/* Thread ID, Privileged */
 #define c14_CNTKCTL	28	/* Timer Control Register (PL1) */
-#define NR_CP15_REGS	29	/* Number of regs (incl. invalid) */
+#define c10_AMAIR0	29	/* Auxilary Memory Attribute Indirection Reg0 */
+#define c10_AMAIR1	30	/* Auxilary Memory Attribute Indirection Reg1 */
+#define NR_CP15_REGS	31	/* Number of regs (incl. invalid) */
 
 #define ARM_EXCEPTION_RESET	  0
 #define ARM_EXCEPTION_UNDEFINED   1
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 126c90d18387..a5a54a48d51b 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -328,6 +328,12 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
 			NULL, reset_unknown, c10_NMRR},
 
+	/* AMAIR0/AMAIR1: swapped by interrupt.S. */
+	{ CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
+			access_vm_reg, reset_unknown, c10_AMAIR0},
+	{ CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
+			access_vm_reg, reset_unknown, c10_AMAIR1},
+
 	/* VBAR: swapped by interrupt.S. */
 	{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c12_VBAR, 0x00000000 },
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index a37270d7d4d6..76af93025574 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -303,13 +303,17 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	mrc	p15, 0, r2, c14, c1, 0	@ CNTKCTL
 	mrrc	p15, 0, r4, r5, c7	@ PAR
+	mrc	p15, 0, r6, c10, c3, 0	@ AMAIR0
+	mrc	p15, 0, r7, c10, c3, 1	@ AMAIR1
 
 	.if \store_to_vcpu == 0
-	push	{r2,r4-r5}
+	push	{r2,r4-r7}
 	.else
 	str	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
 	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
 	strd	r4, r5, [r12]
+	str	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+	str	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
 	.endif
 .endm
 
@@ -322,15 +326,19 @@ vcpu	.req	r0		@ vcpu pointer always in r0
  */
 .macro write_cp15_state read_from_vcpu
 	.if \read_from_vcpu == 0
-	pop	{r2,r4-r5}
+	pop	{r2,r4-r7}
 	.else
 	ldr	r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
 	add	r12, vcpu, #CP15_OFFSET(c7_PAR)
 	ldrd	r4, r5, [r12]
+	ldr	r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+	ldr	r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
 	.endif
 
 	mcr	p15, 0, r2, c14, c1, 0	@ CNTKCTL
 	mcrr	p15, 0, r4, r5, c7	@ PAR
+	mcr	p15, 0, r6, c10, c3, 0	@ AMAIR0
+	mcr	p15, 0, r7, c10, c3, 1	@ AMAIR1
 
 	.if \read_from_vcpu == 0
 	pop	{r2-r12}

From 11178dce0398cd8419c639c8ab81357cb4f2bbf0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 14 Jan 2014 18:00:55 +0000
Subject: [PATCH 151/287] ARM: KVM: trap VM system registers until MMU and
 caches are ON

In order to be able to detect the point where the guest enables
its MMU and caches, trap all the VM related system registers.

Once we see the guest enabling both the MMU and the caches, we
can go back to a saner mode of operation, which is to leave these
registers in complete control of the guest.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 8034699a42d68043b495c7e0cfafccd920707ec8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_arm.h |  3 +-
 arch/arm/kvm/coproc.c          | 74 ++++++++++++++++++++++++++--------
 arch/arm/kvm/coproc.h          |  4 ++
 arch/arm/kvm/coproc_a15.c      |  2 +-
 arch/arm/kvm/coproc_a7.c       |  2 +-
 5 files changed, 66 insertions(+), 19 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index a843e74a384c..816db0bf2dd8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -55,6 +55,7 @@
  * The bits we set in HCR:
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
+ * TVM:		Trap VM ops (until MMU and caches are on)
  * TSW:		Trap cache operations by set/way
  * TWI:		Trap WFI
  * TWE:		Trap WFE
@@ -68,7 +69,7 @@
  */
 #define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
 			HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
-			HCR_TWE | HCR_SWIO | HCR_TIDCP)
+			HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP)
 
 /* System Control Register (SCTLR) bits */
 #define SCTLR_TE	(1 << 30)
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index a5a54a48d51b..c58a35116f63 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -23,6 +23,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <trace/events/kvm.h>
@@ -204,6 +205,44 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+			  const struct coproc_params *p,
+			  const struct coproc_reg *r)
+{
+	BUG_ON(!p->is_write);
+
+	vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
+	if (p->is_64bit)
+		vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
+
+	return true;
+}
+
+/*
+ * SCTLR accessor. Only called as long as HCR_TVM is set.  If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
+ */
+bool access_sctlr(struct kvm_vcpu *vcpu,
+		  const struct coproc_params *p,
+		  const struct coproc_reg *r)
+{
+	access_vm_reg(vcpu, p, r);
+
+	if (vcpu_has_cache_enabled(vcpu)) {	/* MMU+Caches enabled? */
+		vcpu->arch.hcr &= ~HCR_TVM;
+		stage2_flush_vm(vcpu->kvm);
+	}
+
+	return true;
+}
+
 /*
  * We could trap ID_DFR0 and tell the guest we don't support performance
  * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
@@ -261,33 +300,36 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
 			NULL, reset_val, c1_CPACR, 0x00000000 },
 
-	/* TTBR0/TTBR1: swapped by interrupt.S. */
-	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
-	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
-
-	/* TTBCR: swapped by interrupt.S. */
+	/* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */
+	{ CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 },
+	{ CRn(2), CRm( 0), Op1( 0), Op2( 0), is32,
+			access_vm_reg, reset_unknown, c2_TTBR0 },
+	{ CRn(2), CRm( 0), Op1( 0), Op2( 1), is32,
+			access_vm_reg, reset_unknown, c2_TTBR1 },
 	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_val, c2_TTBCR, 0x00000000 },
+			access_vm_reg, reset_val, c2_TTBCR, 0x00000000 },
+	{ CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 },
+
 
 	/* DACR: swapped by interrupt.S. */
 	{ CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c3_DACR },
+			access_vm_reg, reset_unknown, c3_DACR },
 
 	/* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
 	{ CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c5_DFSR },
+			access_vm_reg, reset_unknown, c5_DFSR },
 	{ CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c5_IFSR },
+			access_vm_reg, reset_unknown, c5_IFSR },
 	{ CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c5_ADFSR },
+			access_vm_reg, reset_unknown, c5_ADFSR },
 	{ CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c5_AIFSR },
+			access_vm_reg, reset_unknown, c5_AIFSR },
 
 	/* DFAR/IFAR: swapped by interrupt.S. */
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c6_DFAR },
+			access_vm_reg, reset_unknown, c6_DFAR },
 	{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
-			NULL, reset_unknown, c6_IFAR },
+			access_vm_reg, reset_unknown, c6_IFAR },
 
 	/* PAR swapped by interrupt.S */
 	{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
@@ -324,9 +366,9 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
-			NULL, reset_unknown, c10_PRRR},
+			access_vm_reg, reset_unknown, c10_PRRR},
 	{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
-			NULL, reset_unknown, c10_NMRR},
+			access_vm_reg, reset_unknown, c10_NMRR},
 
 	/* AMAIR0/AMAIR1: swapped by interrupt.S. */
 	{ CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
@@ -340,7 +382,7 @@ static const struct coproc_reg cp15_regs[] = {
 
 	/* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
-			NULL, reset_val, c13_CID, 0x00000000 },
+			access_vm_reg, reset_val, c13_CID, 0x00000000 },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
 			NULL, reset_unknown, c13_TID_URW },
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 8dda870e84f9..1a44bbe39643 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
 #define is64		.is_64 = true
 #define is32		.is_64 = false
 
+bool access_sctlr(struct kvm_vcpu *vcpu,
+		  const struct coproc_params *p,
+		  const struct coproc_reg *r);
+
 #endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index bb0cac1410cc..e6f4ae48bda9 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -34,7 +34,7 @@
 static const struct coproc_reg a15_regs[] = {
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_val, c1_SCTLR, 0x00C50078 },
+			access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
 };
 
 static struct kvm_coproc_target_table a15_target_table = {
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
index 1df767331588..17fc7cd479d3 100644
--- a/arch/arm/kvm/coproc_a7.c
+++ b/arch/arm/kvm/coproc_a7.c
@@ -37,7 +37,7 @@
 static const struct coproc_reg a7_regs[] = {
 	/* SCTLR: swapped by interrupt.S. */
 	{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
-			NULL, reset_val, c1_SCTLR, 0x00C50878 },
+			access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
 };
 
 static struct kvm_coproc_target_table a7_target_table = {

From 0ddf276ba9c11c5bd993c7c87226dde6fc5ffa51 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 30 Jan 2014 17:38:33 +0000
Subject: [PATCH 152/287] ARM: KVM: fix warning in mmu.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compiling with THP enabled leads to the following warning:

arch/arm/kvm/mmu.c: In function ‘unmap_range’:
arch/arm/kvm/mmu.c:177:39: warning: ‘pte’ may be used uninitialized in this function [-Wmaybe-uninitialized]
   if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
                                        ^
Code inspection reveals that these two cases are mutually exclusive,
so GCC is a bit overzealous here. Silence it anyway by initializing
pte to NULL and testing it later on.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 56041bf920d2937b7cadcb30cb206f0372eee814)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index d7e998c6a08f..80bb1e6c2c29 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -144,6 +144,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 	while (addr < end) {
 		pgd = pgdp + pgd_index(addr);
 		pud = pud_offset(pgd, addr);
+		pte = NULL;
 		if (pud_none(*pud)) {
 			addr = kvm_pud_addr_end(addr, end);
 			continue;
@@ -174,7 +175,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 		/*
 		 * If the pmd entry is to be cleared, walk back up the ladder
 		 */
-		if (kvm_pmd_huge(*pmd) || page_empty(pte)) {
+		if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) {
 			clear_pmd_entry(kvm, pmd, addr);
 			next = kvm_pmd_addr_end(addr, end);
 			if (page_empty(pmd) && !page_empty(pud)) {

From 500d9e60bb1f98d24a2780542a68d6a1ffc32315 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 6 Mar 2014 03:30:46 +0000
Subject: [PATCH 153/287] ARM: KVM: fix non-VGIC compilation

Add a stub for kvm_vgic_addr when compiling without
CONFIG_KVM_ARM_VGIC. The usefulness of this configurarion is extremely
doubtful, but let's fix it anyway (until we decide that we'll always
support a VGIC).

Reported-by: Michele Paolino <m.paolino@virtualopensystems.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 6cbde8253a8143ada18ec0d1711230747a7c1934)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index be85127bfed3..f27000f55a83 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -171,6 +171,11 @@ static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 add
 	return 0;
 }
 
+static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
+{
+	return -ENXIO;
+}
+
 static inline int kvm_vgic_init(struct kvm *kvm)
 {
 	return 0;

From 1fd8c219ef68ef56a4a0b0effa66c38e409697eb Mon Sep 17 00:00:00 2001
From: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Date: Fri, 7 Mar 2014 08:49:25 +0000
Subject: [PATCH 154/287] arm64: Add boot time configuration of Intermediate
 Physical Address size

ARMv8 supports a range of physical address bit sizes. The PARange bits
from ID_AA64MMFR0_EL1 register are read during boot-time and the
intermediate physical address size bits are written in the translation
control registers (TCR_EL1 and VTCR_EL2).

There is no change in the VA bits and levels of translation.

Signed-off-by: Radha Mohan Chintakuntla <rchintakuntla@cavium.com>
Reviewed-by: Will Deacon <Will.deacon@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 87366d8cf7b3f6dc34633938aa8766e5a390ce33)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h       | 15 ++++++---------
 arch/arm64/include/asm/pgtable-hwdef.h |  5 ++---
 arch/arm64/kvm/hyp-init.S              |  6 ++++++
 arch/arm64/mm/proc.S                   |  8 +++++++-
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 00fbaa75dc7b..3d6903006a8a 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -107,7 +107,6 @@
 
 /* VTCR_EL2 Registers bits */
 #define VTCR_EL2_PS_MASK	(7 << 16)
-#define VTCR_EL2_PS_40B		(2 << 16)
 #define VTCR_EL2_TG0_MASK	(1 << 14)
 #define VTCR_EL2_TG0_4K		(0 << 14)
 #define VTCR_EL2_TG0_64K	(1 << 14)
@@ -130,10 +129,9 @@
  * 64kB pages (TG0 = 1)
  * 2 level page tables (SL = 1)
  */
-#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
-				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
-				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
-				 VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
 #define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
 #else
 /*
@@ -143,10 +141,9 @@
  * 4kB pages (TG0 = 0)
  * 3 level page tables (SL = 1)
  */
-#define VTCR_EL2_FLAGS		(VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
-				 VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
-				 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
-				 VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
 #define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
 #endif
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index b1d2e26c3c88..f7af66b54cb2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -100,9 +100,9 @@
 #define PTE_HYP			PTE_USER
 
 /*
- * 40-bit physical address supported.
+ * Highest possible physical address supported.
  */
-#define PHYS_MASK_SHIFT		(40)
+#define PHYS_MASK_SHIFT		(48)
 #define PHYS_MASK		((UL(1) << PHYS_MASK_SHIFT) - 1)
 
 /*
@@ -122,7 +122,6 @@
 #define TCR_SHARED		((UL(3) << 12) | (UL(3) << 28))
 #define TCR_TG0_64K		(UL(1) << 14)
 #define TCR_TG1_64K		(UL(1) << 30)
-#define TCR_IPS_40BIT		(UL(2) << 32)
 #define TCR_ASID16		(UL(1) << 36)
 #define TCR_TBI0		(UL(1) << 37)
 
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 2b0244d65c16..d968796f4b2d 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -68,6 +68,12 @@ __do_hyp_init:
 	msr	tcr_el2, x4
 
 	ldr	x4, =VTCR_EL2_FLAGS
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+	 * VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR0_EL1
+	bfi	x4, x5, #16, #3
 	msr	vtcr_el2, x4
 
 	mrs	x4, mair_el1
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 8e0158f198d7..55a40f6dbf78 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -140,8 +140,14 @@ ENTRY(__cpu_setup)
 	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
 	 * both user and kernel.
 	 */
-	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \
+	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \
 		      TCR_ASID16 | TCR_TBI0 | (1 << 31)
+	/*
+	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
+	 * TCR_EL1.
+	 */
+	mrs	x9, ID_AA64MMFR0_EL1
+	bfi	x10, x9, #32, #3
 #ifdef CONFIG_ARM64_64K_PAGES
 	orr	x10, x10, TCR_TG0_64K
 	orr	x10, x10, TCR_TG1_64K

From 7fe3c71dbc4c635cc5e4b527b748749fc771eb57 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Mon, 7 Apr 2014 01:36:08 +0800
Subject: [PATCH 155/287] arm, kvm: fix double lock on cpu_add_remove_lock

Commit 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration)
holds the lock before calling the two functions:

	kvm_vgic_hyp_init()
	kvm_timer_hyp_init()

and both the two functions are calling register_cpu_notifier()
to register cpu notifier, so cause double lock on cpu_add_remove_lock.

Considered that both two functions are only called inside
kvm_arch_init() with holding cpu_add_remove_lock, so simply use
__register_cpu_notifier() to fix the problem.

Fixes: 8146875de7d4 (arm, kvm: Fix CPU hotplug callback registration)
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 553f809e23f00976caea7a1ebdabaa58a6383e7d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 2 +-
 virt/kvm/arm/vgic.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 5081e809821f..22fa819a9b6a 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void)
 
 	host_vtimer_irq = ppi;
 
-	err = register_cpu_notifier(&kvm_timer_cpu_nb);
+	err = __register_cpu_notifier(&kvm_timer_cpu_nb);
 	if (err) {
 		kvm_err("Cannot register timer CPU notifier\n");
 		goto out_free;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8ca405cd7c1a..47b29834a6b6 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1496,7 +1496,7 @@ int kvm_vgic_hyp_init(void)
 		goto out;
 	}
 
-	ret = register_cpu_notifier(&vgic_cpu_nb);
+	ret = __register_cpu_notifier(&vgic_cpu_nb);
 	if (ret) {
 		kvm_err("Cannot register vgic CPU notifier\n");
 		goto out_free_irq;

From 9b937709747526fc1fcac118d605705f78424d3c Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:09:21 +0100
Subject: [PATCH 156/287] KVM: add kvm_is_error_gpa() helper

It's quite common (in the s390 guest access code) to test if a guest
physical address points to a valid guest memory area or not.
So add a simple helper function in common code, since this might be
of interest for other architectures as well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
(cherry picked from commit dfeec843fb237d73947e818f961e8d6f0df22b01)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f6801d10e04c..eef946f92c0c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -840,6 +840,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
 	return (hpa_t)pfn << PAGE_SHIFT;
 }
 
+static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+{
+	unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+	return kvm_is_error_hva(hva);
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);

From d80e22fea4940bd2ad63648a941620b6fccd59a3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 25 Apr 2014 11:46:04 +0100
Subject: [PATCH 157/287] ARM: KVM: disable KVM in Kconfig on big-endian
 systems

KVM currently crashes and burns on big-endian hosts, so don't allow it
to be selected until we've got that fixed.

Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4e4468fac4381b92eb333d94256e7fb8350f3de3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 466bd299b1a8..4be5bb150bdd 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -23,7 +23,7 @@ config KVM
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
-	depends on ARM_VIRT_EXT && ARM_LPAE
+	depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN
 	---help---
 	  Support hosting virtualized guest machines. You will also
 	  need to select one or more of the processor modules below.

From 16b80f7131f5d763118811815439c82e4ac45e50 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Fri, 28 Mar 2014 14:25:19 +0000
Subject: [PATCH 158/287] arm: KVM: fix possible misalignment of PGDs and
 bounce page

The kvm/mmu code shared by arm and arm64 uses kalloc() to allocate
a bounce page (if hypervisor init code crosses page boundary) and
hypervisor PGDs. The problem is that kalloc() does not guarantee
the proper alignment. In the case of the bounce page, the page sized
buffer allocated may also cross a page boundary negating the purpose
and leading to a hang during kvm initialization. Likewise the PGDs
allocated may not meet the minimum alignment requirements of the
underlying MMU. This patch uses __get_free_page() to guarantee the
worst case alignment needs of the bounce page and PGDs on both arm
and arm64.

Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: Mark Salter <msalter@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 5d4e08c45a6cf8f1ab3c7fa375007635ac569165)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 80bb1e6c2c29..16f804938b8f 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -42,6 +42,8 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+
 #define kvm_pmd_huge(_x)	(pmd_huge(_x) || pmd_trans_huge(_x))
 
 static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
@@ -293,14 +295,14 @@ void free_boot_hyp_pgd(void)
 	if (boot_hyp_pgd) {
 		unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
 		unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-		kfree(boot_hyp_pgd);
+		free_pages((unsigned long)boot_hyp_pgd, pgd_order);
 		boot_hyp_pgd = NULL;
 	}
 
 	if (hyp_pgd)
 		unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
 
-	kfree(init_bounce_page);
+	free_page((unsigned long)init_bounce_page);
 	init_bounce_page = NULL;
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
@@ -330,7 +332,7 @@ void free_hyp_pgds(void)
 		for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
 			unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 
-		kfree(hyp_pgd);
+		free_pages((unsigned long)hyp_pgd, pgd_order);
 		hyp_pgd = NULL;
 	}
 
@@ -1024,7 +1026,7 @@ int kvm_mmu_init(void)
 		size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
 		phys_addr_t phys_base;
 
-		init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
 		if (!init_bounce_page) {
 			kvm_err("Couldn't allocate HYP init bounce page\n");
 			err = -ENOMEM;
@@ -1050,8 +1052,9 @@ int kvm_mmu_init(void)
 			 (unsigned long)phys_base);
 	}
 
-	hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
-	boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+	boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+
 	if (!hyp_pgd || !boot_hyp_pgd) {
 		kvm_err("Hyp mode PGD not allocated\n");
 		err = -ENOMEM;

From 738a77ae39541495f4d5396ed093bccdebc589f1 Mon Sep 17 00:00:00 2001
From: Haibin Wang <wanghaibin.wang@huawei.com>
Date: Thu, 10 Apr 2014 13:14:32 +0100
Subject: [PATCH 159/287] KVM: ARM: vgic: Fix sgi dispatch problem

When dispatch SGI(mode == 0), that is the vcpu of VM should send
sgi to the cpu which the target_cpus list.
So, there must add the "break" to branch of case 0.

Cc: <stable@vger.kernel.org> # 3.10+
Signed-off-by: Haibin Wang <wanghaibin.wang@huawei.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 91021a6c8ffdc55804dab5acdfc7de4f278b9ac3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 47b29834a6b6..7e8b44efb739 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -916,6 +916,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 	case 0:
 		if (!target_cpus)
 			return;
+		break;
 
 	case 1:
 		target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;

From 6e349e33cec7724f9ae0520a1539ab7cf1d24893 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Apr 2014 15:26:01 +0200
Subject: [PATCH 160/287] KVM: async_pf: mm->mm_users can not pin apf->mm

get_user_pages(mm) is simply wrong if mm->mm_users == 0 and exit_mmap/etc
was already called (or is in progress), mm->mm_count can only pin mm->pgd
and mm_struct itself.

Change kvm_setup_async_pf/async_pf_execute to inc/dec mm->mm_users.

kvm_create_vm/kvm_destroy_vm play with ->mm_count too but this case looks
fine at first glance, it seems that this ->mm is only used to verify that
current->mm == kvm->mm.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 41c22f626254b9dc0376928cae009e73d1b6a49a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 10df100c4514..06e6401d6ef4 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -101,7 +101,7 @@ static void async_pf_execute(struct work_struct *work)
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-	mmdrop(mm);
+	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
 }
 
@@ -118,7 +118,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 		flush_work(&work->work);
 #else
 		if (cancel_work_sync(&work->work)) {
-			mmdrop(work->mm);
+			mmput(work->mm);
 			kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
 			kmem_cache_free(async_pf_cache, work);
 		}
@@ -183,7 +183,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 	work->addr = hva;
 	work->arch = *arch;
 	work->mm = current->mm;
-	atomic_inc(&work->mm->mm_count);
+	atomic_inc(&work->mm->mm_users);
 	kvm_get_kvm(work->vcpu->kvm);
 
 	/* this can't really happen otherwise gfn_to_pfn_async
@@ -201,7 +201,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 	return 1;
 retry_sync:
 	kvm_put_kvm(work->vcpu->kvm);
-	mmdrop(work->mm);
+	mmput(work->mm);
 	kmem_cache_free(async_pf_cache, work);
 	return 0;
 }

From bc38de2312a2a078f2d5ad596888a131d811b143 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 11 Apr 2014 00:07:18 +0200
Subject: [PATCH 161/287] KVM: arm/arm64: vgic: fix GICD_ICFGR register
 accesses

Since KVM internally represents the ICFGR registers by stuffing two
of them into one word, the offset for accessing the internal
representation and the one for the MMIO based access are different.
So keep the original offset around, but adjust the internal array
offset by one bit.

Reported-by: Haibin Wang <wanghaibin.wang@huawei.com>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit f2ae85b2ab3776b9e4e42e5b6fa090f40d396794)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 7e8b44efb739..f9af48c9eb37 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 	u32 val;
 	u32 *reg;
 
-	offset >>= 1;
 	reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
-				  vcpu->vcpu_id, offset);
+				  vcpu->vcpu_id, offset >> 1);
 
-	if (offset & 2)
+	if (offset & 4)
 		val = *reg >> 16;
 	else
 		val = *reg & 0xffff;
@@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
 	vgic_reg_access(mmio, &val, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
 	if (mmio->is_write) {
-		if (offset < 4) {
+		if (offset < 8) {
 			*reg = ~0U; /* Force PPIs/SGIs to 1 */
 			return false;
 		}
 
 		val = vgic_cfg_compress(val);
-		if (offset & 2) {
+		if (offset & 4) {
 			*reg &= 0xffff;
 			*reg |= val << 16;
 		} else {

From 2a1992b43b4e9f453e77e62740181ef665426c93 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 21 Apr 2014 15:25:58 +0200
Subject: [PATCH 162/287] KVM: async_pf: kill the unnecessary use_mm/unuse_mm
 async_pf_execute()

async_pf_execute() has no reasons to adopt apf->mm, gup(current, mm)
should work just fine even if current has another or NULL ->mm.

Recently kvm_async_page_present_sync() was added insedie the "use_mm"
section, but it seems that it doesn't need current->mm too.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d72d946d0b649b79709b99b9d5cb7269fff8afaa)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 06e6401d6ef4..cda703e512d3 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,12 +80,10 @@ static void async_pf_execute(struct work_struct *work)
 
 	might_sleep();
 
-	use_mm(mm);
 	down_read(&mm->mmap_sem);
 	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	kvm_async_page_present_sync(vcpu, apf);
-	unuse_mm(mm);
 
 	spin_lock(&vcpu->async_pf.lock);
 	list_add_tail(&apf->link, &vcpu->async_pf.done);

From 4c158ade8e722cd59ba1a6d8ca46be8427c8ac90 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 28 Apr 2014 17:03:00 +0200
Subject: [PATCH 163/287] KVM: async_pf: change async_pf_execute() to use
 get_user_pages(tsk => NULL)

async_pf_execute() passes tsk == current to gup(), this is doesn't
hurt but unnecessary and misleading. "tsk" is only used to account
the number of faults and current is the random workqueue thread.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e9545b9f8aeb63e05818e4b3250057260bc072aa)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/async_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index cda703e512d3..d6a3d0993d88 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -81,7 +81,7 @@ static void async_pf_execute(struct work_struct *work)
 	might_sleep();
 
 	down_read(&mm->mmap_sem);
-	get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
+	get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
 	up_read(&mm->mmap_sem);
 	kvm_async_page_present_sync(vcpu, apf);
 

From de055bd5eb6f2daf69c81f6f62241f0b977a2ae4 Mon Sep 17 00:00:00 2001
From: Haibin Wang <wanghaibin.wang@huawei.com>
Date: Tue, 29 Apr 2014 14:49:17 +0800
Subject: [PATCH 164/287] KVM: ARM: vgic: Fix the overlap check action about
 setting the GICD & GICC base address.

Currently below check in vgic_ioaddr_overlap will always succeed,
because the vgic dist base and vgic cpu base are still kept UNDEF
after initialization. The code as follows will be return forever.

	if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
                return 0;

So, before invoking the vgic_ioaddr_overlap, it needs to set the
corresponding base address firstly.

Signed-off-by: Haibin Wang <wanghaibin.wang@huawei.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 30c2117085bc4e05d091cee6eba79f069b41a9cd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index f9af48c9eb37..56ff9bebb577 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
 	if (addr + size < addr)
 		return -EINVAL;
 
+	*ioaddr = addr;
 	ret = vgic_ioaddr_overlap(kvm);
 	if (ret)
-		return ret;
-	*ioaddr = addr;
+		*ioaddr = VGIC_ADDR_UNDEF;
+
 	return ret;
 }
 

From 7d46aca1cf97c1ff323d799fd82ffd49bc294bd1 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:14 +0530
Subject: [PATCH 165/287] KVM: Add capability to advertise PSCI v0.2 support

User space (i.e. QEMU or KVMTOOL) should be able to check whether KVM
ARM/ARM64 supports in-kernel PSCI v0.2 emulation. For this purpose, we
define KVM_CAP_ARM_PSCI_0_2 in KVM user space interface header.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 717abd208dff75b343243aa5ed688f62190dda5e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 56347ab54cf7..cb7ebcc7a9db 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -669,6 +669,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_IRQ_XICS 92
 #define KVM_CAP_ARM_EL1_32BIT 93
 #define KVM_CAP_EXT_EMUL_CPUID 95
+#define KVM_CAP_ARM_PSCI_0_2 102
 
 #ifdef KVM_CAP_IRQ_ROUTING
 

From 041d3f918f8daf3715fc87f1516c40466f94e6dd Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:15 +0530
Subject: [PATCH 166/287] ARM/ARM64: KVM: Add common header for PSCI related
 defines

We need a common place to share PSCI related defines among ARM kernel,
ARM64 kernel, KVM ARM/ARM64 PSCI emulation, and user space.

We introduce uapi/linux/psci.h for this purpose. This newly added
header will be first used by KVM ARM/ARM64 in-kernel PSCI emulation
and user space (i.e. QEMU or KVMTOOL).

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e546eea74ec66698e29c583639cf6e2a11e46490)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/uapi/linux/Kbuild |  1 +
 include/uapi/linux/psci.h | 90 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 include/uapi/linux/psci.h

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index bdc6e87ff3eb..405887bec8b3 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -311,6 +311,7 @@ header-y += ppp-ioctl.h
 header-y += ppp_defs.h
 header-y += pps.h
 header-y += prctl.h
+header-y += psci.h
 header-y += ptp_clock.h
 header-y += ptrace.h
 header-y += qnx4_fs.h
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
new file mode 100644
index 000000000000..310d83e0a91b
--- /dev/null
+++ b/include/uapi/linux/psci.h
@@ -0,0 +1,90 @@
+/*
+ * ARM Power State and Coordination Interface (PSCI) header
+ *
+ * This header holds common PSCI defines and macros shared
+ * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space.
+ *
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Anup Patel <anup.patel@linaro.org>
+ */
+
+#ifndef _UAPI_LINUX_PSCI_H
+#define _UAPI_LINUX_PSCI_H
+
+/*
+ * PSCI v0.1 interface
+ *
+ * The PSCI v0.1 function numbers are implementation defined.
+ *
+ * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED,
+ * INVALID_PARAMS, and DENIED defined below are applicable
+ * to PSCI v0.1.
+ */
+
+/* PSCI v0.2 interface */
+#define PSCI_0_2_FN_BASE			0x84000000
+#define PSCI_0_2_FN(n)				(PSCI_0_2_FN_BASE + (n))
+#define PSCI_0_2_64BIT				0x40000000
+#define PSCI_0_2_FN64_BASE			\
+					(PSCI_0_2_FN_BASE + PSCI_0_2_64BIT)
+#define PSCI_0_2_FN64(n)			(PSCI_0_2_FN64_BASE + (n))
+
+#define PSCI_0_2_FN_PSCI_VERSION		PSCI_0_2_FN(0)
+#define PSCI_0_2_FN_CPU_SUSPEND			PSCI_0_2_FN(1)
+#define PSCI_0_2_FN_CPU_OFF			PSCI_0_2_FN(2)
+#define PSCI_0_2_FN_CPU_ON			PSCI_0_2_FN(3)
+#define PSCI_0_2_FN_AFFINITY_INFO		PSCI_0_2_FN(4)
+#define PSCI_0_2_FN_MIGRATE			PSCI_0_2_FN(5)
+#define PSCI_0_2_FN_MIGRATE_INFO_TYPE		PSCI_0_2_FN(6)
+#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU		PSCI_0_2_FN(7)
+#define PSCI_0_2_FN_SYSTEM_OFF			PSCI_0_2_FN(8)
+#define PSCI_0_2_FN_SYSTEM_RESET		PSCI_0_2_FN(9)
+
+#define PSCI_0_2_FN64_CPU_SUSPEND		PSCI_0_2_FN64(1)
+#define PSCI_0_2_FN64_CPU_ON			PSCI_0_2_FN64(3)
+#define PSCI_0_2_FN64_AFFINITY_INFO		PSCI_0_2_FN64(4)
+#define PSCI_0_2_FN64_MIGRATE			PSCI_0_2_FN64(5)
+#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU	PSCI_0_2_FN64(7)
+
+/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
+#define PSCI_0_2_POWER_STATE_ID_MASK		0xffff
+#define PSCI_0_2_POWER_STATE_ID_SHIFT		0
+#define PSCI_0_2_POWER_STATE_TYPE_SHIFT		16
+#define PSCI_0_2_POWER_STATE_TYPE_MASK		\
+				(0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+#define PSCI_0_2_POWER_STATE_AFFL_SHIFT		24
+#define PSCI_0_2_POWER_STATE_AFFL_MASK		\
+				(0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+
+/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */
+#define PSCI_0_2_AFFINITY_LEVEL_ON		0
+#define PSCI_0_2_AFFINITY_LEVEL_OFF		1
+#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING	2
+
+/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */
+#define PSCI_0_2_TOS_UP_MIGRATE			0
+#define PSCI_0_2_TOS_UP_NO_MIGRATE		1
+#define PSCI_0_2_TOS_MP				2
+
+/* PSCI version decoding (independent of PSCI version) */
+#define PSCI_VERSION_MAJOR_SHIFT		16
+#define PSCI_VERSION_MINOR_MASK			\
+		((1U << PSCI_VERSION_MAJOR_SHIFT) - 1)
+#define PSCI_VERSION_MAJOR_MASK			~PSCI_VERSION_MINOR_MASK
+#define PSCI_VERSION_MAJOR(ver)			\
+		(((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT)
+#define PSCI_VERSION_MINOR(ver)			\
+		((ver) & PSCI_VERSION_MINOR_MASK)
+
+/* PSCI return values (inclusive of all PSCI versions) */
+#define PSCI_RET_SUCCESS			0
+#define PSCI_RET_NOT_SUPPORTED			-1
+#define PSCI_RET_INVALID_PARAMS			-2
+#define PSCI_RET_DENIED				-3
+#define PSCI_RET_ALREADY_ON			-4
+#define PSCI_RET_ON_PENDING			-5
+#define PSCI_RET_INTERNAL_FAILURE		-6
+#define PSCI_RET_NOT_PRESENT			-7
+#define PSCI_RET_DISABLED			-8
+
+#endif /* _UAPI_LINUX_PSCI_H */

From 4ccf6abe18fc14863159b2c51a4958214b4c13dc Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:16 +0530
Subject: [PATCH 167/287] ARM/ARM64: KVM: Add base for PSCI v0.2 emulation

Currently, the in-kernel PSCI emulation provides PSCI v0.1 interface to
VCPUs. This patch extends current in-kernel PSCI emulation to provide
PSCI v0.2 interface to VCPUs.

By default, ARM/ARM64 KVM will always provide PSCI v0.1 interface for
keeping the ABI backward-compatible.

To select PSCI v0.2 interface for VCPUs, the user space (i.e. QEMU or
KVMTOOL) will have to set KVM_ARM_VCPU_PSCI_0_2 feature when doing VCPU
init using KVM_ARM_VCPU_INIT ioctl.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 7d0f84aae9e231930985eaff63ac91b61aaa15d6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   |   2 +-
 arch/arm/include/asm/kvm_psci.h   |   4 ++
 arch/arm/include/uapi/asm/kvm.h   |  10 +--
 arch/arm/kvm/psci.c               | 105 +++++++++++++++++++++++-------
 arch/arm64/include/asm/kvm_host.h |   2 +-
 arch/arm64/include/asm/kvm_psci.h |   4 ++
 arch/arm64/include/uapi/asm/kvm.h |  10 +--
 7 files changed, 105 insertions(+), 32 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 09af14999c9b..193ceaf01bfd 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -36,7 +36,7 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #define KVM_HAVE_ONE_REG
 
-#define KVM_VCPU_MAX_FEATURES 1
+#define KVM_VCPU_MAX_FEATURES 2
 
 #include <kvm/arm_vgic.h>
 
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 9a83d98bf170..4c0e3e1d1597 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
 #ifndef __ARM_KVM_PSCI_H__
 #define __ARM_KVM_PSCI_H__
 
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
 bool kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index ef0c8785ba16..e6ebdd3471e5 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -20,6 +20,7 @@
 #define __ARM_KVM_H__
 
 #include <linux/types.h>
+#include <linux/psci.h>
 #include <asm/ptrace.h>
 
 #define __KVM_HAVE_GUEST_DEBUG
@@ -83,6 +84,7 @@ struct kvm_regs {
 #define KVM_VGIC_V2_CPU_SIZE		0x2000
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2		1 /* CPU uses PSCI v0.2 */
 
 struct kvm_vcpu_init {
 	__u32 target;
@@ -201,9 +203,9 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 
-#define KVM_PSCI_RET_SUCCESS		0
-#define KVM_PSCI_RET_NI			((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
 
 #endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 448f60e8d23c..8c42596cdbdf 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -59,7 +59,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	 * turned off.
 	 */
 	if (!vcpu || !vcpu->arch.pause)
-		return KVM_PSCI_RET_INVAL;
+		return PSCI_RET_INVALID_PARAMS;
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
 
@@ -82,7 +82,82 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	wq = kvm_arch_vcpu_wq(vcpu);
 	wake_up_interruptible(wq);
 
-	return KVM_PSCI_RET_SUCCESS;
+	return PSCI_RET_SUCCESS;
+}
+
+int kvm_psci_version(struct kvm_vcpu *vcpu)
+{
+	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+		return KVM_ARM_PSCI_0_2;
+
+	return KVM_ARM_PSCI_0_1;
+}
+
+static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+{
+	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case PSCI_0_2_FN_PSCI_VERSION:
+		/*
+		 * Bits[31:16] = Major Version = 0
+		 * Bits[15:0] = Minor Version = 2
+		 */
+		val = 2;
+		break;
+	case PSCI_0_2_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case PSCI_0_2_FN_CPU_ON:
+	case PSCI_0_2_FN64_CPU_ON:
+		val = kvm_psci_vcpu_on(vcpu);
+		break;
+	case PSCI_0_2_FN_CPU_SUSPEND:
+	case PSCI_0_2_FN_AFFINITY_INFO:
+	case PSCI_0_2_FN_MIGRATE:
+	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+	case PSCI_0_2_FN_SYSTEM_OFF:
+	case PSCI_0_2_FN_SYSTEM_RESET:
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+	case PSCI_0_2_FN64_AFFINITY_INFO:
+	case PSCI_0_2_FN64_MIGRATE:
+	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	default:
+		return false;
+	}
+
+	*vcpu_reg(vcpu, 0) = val;
+	return true;
+}
+
+static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
+{
+	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+	unsigned long val;
+
+	switch (psci_fn) {
+	case KVM_PSCI_FN_CPU_OFF:
+		kvm_psci_vcpu_off(vcpu);
+		val = PSCI_RET_SUCCESS;
+		break;
+	case KVM_PSCI_FN_CPU_ON:
+		val = kvm_psci_vcpu_on(vcpu);
+		break;
+	case KVM_PSCI_FN_CPU_SUSPEND:
+	case KVM_PSCI_FN_MIGRATE:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	default:
+		return false;
+	}
+
+	*vcpu_reg(vcpu, 0) = val;
+	return true;
 }
 
 /**
@@ -97,26 +172,12 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
  */
 bool kvm_psci_call(struct kvm_vcpu *vcpu)
 {
-	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
-	unsigned long val;
-
-	switch (psci_fn) {
-	case KVM_PSCI_FN_CPU_OFF:
-		kvm_psci_vcpu_off(vcpu);
-		val = KVM_PSCI_RET_SUCCESS;
-		break;
-	case KVM_PSCI_FN_CPU_ON:
-		val = kvm_psci_vcpu_on(vcpu);
-		break;
-	case KVM_PSCI_FN_CPU_SUSPEND:
-	case KVM_PSCI_FN_MIGRATE:
-		val = KVM_PSCI_RET_NI;
-		break;
-
+	switch (kvm_psci_version(vcpu)) {
+	case KVM_ARM_PSCI_0_2:
+		return kvm_psci_0_2_call(vcpu);
+	case KVM_ARM_PSCI_0_1:
+		return kvm_psci_0_1_call(vcpu);
 	default:
 		return false;
-	}
-
-	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	};
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0a1d69751562..92242ce06309 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -39,7 +39,7 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 
-#define KVM_VCPU_MAX_FEATURES 2
+#define KVM_VCPU_MAX_FEATURES 3
 
 struct kvm_vcpu;
 int kvm_target_cpu(void);
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
index e301a4816355..e25c658a757b 100644
--- a/arch/arm64/include/asm/kvm_psci.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
 #ifndef __ARM64_KVM_PSCI_H__
 #define __ARM64_KVM_PSCI_H__
 
+#define KVM_ARM_PSCI_0_1	1
+#define KVM_ARM_PSCI_0_2	2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
 bool kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index eaf54a30bedc..e6471daf3fb5 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -31,6 +31,7 @@
 #define KVM_NR_SPSR	5
 
 #ifndef __ASSEMBLY__
+#include <linux/psci.h>
 #include <asm/types.h>
 #include <asm/ptrace.h>
 
@@ -77,6 +78,7 @@ struct kvm_regs {
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
 
 struct kvm_vcpu_init {
 	__u32 target;
@@ -186,10 +188,10 @@ struct kvm_arch_memory_slot {
 #define KVM_PSCI_FN_CPU_ON		KVM_PSCI_FN(2)
 #define KVM_PSCI_FN_MIGRATE		KVM_PSCI_FN(3)
 
-#define KVM_PSCI_RET_SUCCESS		0
-#define KVM_PSCI_RET_NI			((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL		((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED		((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS		PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI			PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL		PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED		PSCI_RET_DENIED
 
 #endif
 

From c6debeb5f9cf9a1c1f8867d991563302797e20c3 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:18 +0530
Subject: [PATCH 168/287] ARM/ARM64: KVM: Make kvm_psci_call() return
 convention more flexible

Currently, the kvm_psci_call() returns 'true' or 'false' based on whether
the PSCI function call was handled successfully or not. This does not help
us emulate system-level PSCI functions where the actual emulation work will
be done by user space (QEMU or KVMTOOL). Examples of such system-level PSCI
functions are: PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET.

This patch updates kvm_psci_call() to return three types of values:
1) > 0 (success)
2) = 0 (success but exit to user space)
3) < 0 (errors)

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e8e7fcc5e2710b31ef842ee799db99c07986c364)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_psci.h   |  2 +-
 arch/arm/kvm/handle_exit.c        | 10 +++++++---
 arch/arm/kvm/psci.c               | 28 ++++++++++++++++------------
 arch/arm64/include/asm/kvm_psci.h |  2 +-
 arch/arm64/kvm/handle_exit.c      | 12 ++++++++----
 5 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 4c0e3e1d1597..6bda945d31fa 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -22,6 +22,6 @@
 #define KVM_ARM_PSCI_0_2	2
 
 int kvm_psci_version(struct kvm_vcpu *vcpu);
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 0de91fc6de0f..4c979d466cc1 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -38,14 +38,18 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
+	int ret;
+
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      kvm_vcpu_hvc_get_imm(vcpu));
 
-	if (kvm_psci_call(vcpu))
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
 		return 1;
+	}
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	return ret;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 8c42596cdbdf..14e6fa6c8e35 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -93,7 +93,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu)
 	return KVM_ARM_PSCI_0_1;
 }
 
-static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
@@ -128,14 +128,14 @@ static bool kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		val = PSCI_RET_NOT_SUPPORTED;
 		break;
 	default:
-		return false;
+		return -EINVAL;
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	return 1;
 }
 
-static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
+static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 {
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
@@ -153,11 +153,11 @@ static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
 		val = PSCI_RET_NOT_SUPPORTED;
 		break;
 	default:
-		return false;
+		return -EINVAL;
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return true;
+	return 1;
 }
 
 /**
@@ -165,12 +165,16 @@ static bool kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
  * @vcpu: Pointer to the VCPU struct
  *
  * Handle PSCI calls from guests through traps from HVC instructions.
- * The calling convention is similar to SMC calls to the secure world where
- * the function number is placed in r0 and this function returns true if the
- * function number specified in r0 is withing the PSCI range, and false
- * otherwise.
+ * The calling convention is similar to SMC calls to the secure world
+ * where the function number is placed in r0.
+ *
+ * This function returns: > 0 (success), 0 (success but exit to user
+ * space), and < 0 (errors)
+ *
+ * Errors:
+ * -EINVAL: Unrecognized PSCI function
  */
-bool kvm_psci_call(struct kvm_vcpu *vcpu)
+int kvm_psci_call(struct kvm_vcpu *vcpu)
 {
 	switch (kvm_psci_version(vcpu)) {
 	case KVM_ARM_PSCI_0_2:
@@ -178,6 +182,6 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
 	case KVM_ARM_PSCI_0_1:
 		return kvm_psci_0_1_call(vcpu);
 	default:
-		return false;
+		return -EINVAL;
 	};
 }
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
index e25c658a757b..bc39e557c56c 100644
--- a/arch/arm64/include/asm/kvm_psci.h
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -22,6 +22,6 @@
 #define KVM_ARM_PSCI_0_2	2
 
 int kvm_psci_version(struct kvm_vcpu *vcpu);
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 7bc41eab4c64..182415e1a952 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -30,11 +30,15 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
+	int ret;
 
-	kvm_inject_undefined(vcpu);
-	return 1;
+	ret = kvm_psci_call(vcpu);
+	if (ret < 0) {
+		kvm_inject_undefined(vcpu);
+		return 1;
+	}
+
+	return ret;
 }
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)

From 7b0aa85882b902333c01f07bef88da07e5c53ebf Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:19 +0530
Subject: [PATCH 169/287] KVM: Add KVM_EXIT_SYSTEM_EVENT to user space API
 header

Currently, we don't have an exit reason to notify user space about
a system-level event (for e.g. system reset or shutdown) triggered
by the VCPU. This patch adds exit reason KVM_EXIT_SYSTEM_EVENT for
this purpose. We can also inform user space about the 'type' and
architecture specific 'flags' of a system-level event using the
kvm_run structure.

This newly added KVM_EXIT_SYSTEM_EVENT will be used by KVM ARM/ARM64
in-kernel PSCI v0.2 support to reset/shutdown VMs.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 8ad6b634928a25971dc42dce101808b1491f87ec)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt | 15 +++++++++++++++
 include/uapi/linux/kvm.h          |  8 ++++++++
 2 files changed, 23 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 3c75a17555a8..48e7888291f5 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2636,6 +2636,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
 external interrupt has just been delivered into the guest. User space
 should put the acknowledged interrupt vector into the 'epr' field.
 
+		/* KVM_EXIT_SYSTEM_EVENT */
+		struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN       1
+#define KVM_SYSTEM_EVENT_RESET          2
+			__u32 type;
+			__u64 flags;
+		} system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index cb7ebcc7a9db..0a70874e4a63 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -171,6 +171,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_WATCHDOG         21
 #define KVM_EXIT_S390_TSCH        22
 #define KVM_EXIT_EPR              23
+#define KVM_EXIT_SYSTEM_EVENT     24
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -301,6 +302,13 @@ struct kvm_run {
 		struct {
 			__u32 epr;
 		} epr;
+		/* KVM_EXIT_SYSTEM_EVENT */
+		struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN       1
+#define KVM_SYSTEM_EVENT_RESET          2
+			__u32 type;
+			__u64 flags;
+		} system_event;
 		/* Fix the size of the union. */
 		char padding[256];
 	};

From 2834b56a4051c66feb99c5546723fd6afa434d0e Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:20 +0530
Subject: [PATCH 170/287] ARM/ARM64: KVM: Emulate PSCI v0.2 SYSTEM_OFF and
 SYSTEM_RESET

The PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET functions are system-level
functions hence cannot be fully emulated by in-kernel PSCI emulation code.

To tackle this, we forward PSCI v0.2 SYSTEM_OFF and SYSTEM_RESET function
calls from vcpu to user space (i.e. QEMU or KVMTOOL) via kvm_run structure
using KVM_EXIT_SYSTEM_EVENT exit reasons.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4b1238269ed340d59ef829fd9c30a39cfb2923a8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 46 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 14e6fa6c8e35..59362131b79f 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -85,6 +85,23 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	return PSCI_RET_SUCCESS;
 }
 
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+{
+	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+	vcpu->run->system_event.type = type;
+	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
+{
+	kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+}
+
 int kvm_psci_version(struct kvm_vcpu *vcpu)
 {
 	if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
@@ -95,6 +112,7 @@ int kvm_psci_version(struct kvm_vcpu *vcpu)
 
 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 {
+	int ret = 1;
 	unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
 	unsigned long val;
 
@@ -114,13 +132,35 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	case PSCI_0_2_FN64_CPU_ON:
 		val = kvm_psci_vcpu_on(vcpu);
 		break;
+	case PSCI_0_2_FN_SYSTEM_OFF:
+		kvm_psci_system_off(vcpu);
+		/*
+		 * We should'nt be going back to guest VCPU after
+		 * receiving SYSTEM_OFF request.
+		 *
+		 * If user space accidently/deliberately resumes
+		 * guest VCPU after SYSTEM_OFF request then guest
+		 * VCPU should see internal failure from PSCI return
+		 * value. To achieve this, we preload r0 (or x0) with
+		 * PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
+	case PSCI_0_2_FN_SYSTEM_RESET:
+		kvm_psci_system_reset(vcpu);
+		/*
+		 * Same reason as SYSTEM_OFF for preloading r0 (or x0)
+		 * with PSCI return value INTERNAL_FAILURE.
+		 */
+		val = PSCI_RET_INTERNAL_FAILURE;
+		ret = 0;
+		break;
 	case PSCI_0_2_FN_CPU_SUSPEND:
 	case PSCI_0_2_FN_AFFINITY_INFO:
 	case PSCI_0_2_FN_MIGRATE:
 	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
 	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
-	case PSCI_0_2_FN_SYSTEM_OFF:
-	case PSCI_0_2_FN_SYSTEM_RESET:
 	case PSCI_0_2_FN64_CPU_SUSPEND:
 	case PSCI_0_2_FN64_AFFINITY_INFO:
 	case PSCI_0_2_FN64_MIGRATE:
@@ -132,7 +172,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	}
 
 	*vcpu_reg(vcpu, 0) = val;
-	return 1;
+	return ret;
 }
 
 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)

From 4c7726b875f0ae0aa10b553d0ddf1662ca1c8536 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:21 +0530
Subject: [PATCH 171/287] ARM/ARM64: KVM: Emulate PSCI v0.2 AFFINITY_INFO

This patch adds emulation of PSCI v0.2 AFFINITY_INFO function call
for KVM ARM/ARM64. This is a VCPU-level function call which will be
used to determine current state of given affinity level.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit e6bc13c8a70eabc6a39098ccedf6129c734e3db3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 52 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 59362131b79f..3b6a0cf25c7d 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -27,6 +27,16 @@
  * as described in ARM document number ARM DEN 0022A.
  */
 
+#define AFFINITY_MASK(level)	~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+
+static unsigned long psci_affinity_mask(unsigned long affinity_level)
+{
+	if (affinity_level <= 3)
+		return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
+
+	return 0;
+}
+
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pause = true;
@@ -85,6 +95,42 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	return PSCI_RET_SUCCESS;
 }
 
+static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
+{
+	int i;
+	unsigned long mpidr;
+	unsigned long target_affinity;
+	unsigned long target_affinity_mask;
+	unsigned long lowest_affinity_level;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu *tmp;
+
+	target_affinity = *vcpu_reg(vcpu, 1);
+	lowest_affinity_level = *vcpu_reg(vcpu, 2);
+
+	/* Determine target affinity mask */
+	target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
+	if (!target_affinity_mask)
+		return PSCI_RET_INVALID_PARAMS;
+
+	/* Ignore other bits of target affinity */
+	target_affinity &= target_affinity_mask;
+
+	/*
+	 * If one or more VCPU matching target affinity are running
+	 * then ON else OFF
+	 */
+	kvm_for_each_vcpu(i, tmp, kvm) {
+		mpidr = kvm_vcpu_get_mpidr(tmp);
+		if (((mpidr & target_affinity_mask) == target_affinity) &&
+		    !tmp->arch.pause) {
+			return PSCI_0_2_AFFINITY_LEVEL_ON;
+		}
+	}
+
+	return PSCI_0_2_AFFINITY_LEVEL_OFF;
+}
+
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
 	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
@@ -132,6 +178,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	case PSCI_0_2_FN64_CPU_ON:
 		val = kvm_psci_vcpu_on(vcpu);
 		break;
+	case PSCI_0_2_FN_AFFINITY_INFO:
+	case PSCI_0_2_FN64_AFFINITY_INFO:
+		val = kvm_psci_vcpu_affinity_info(vcpu);
+		break;
 	case PSCI_0_2_FN_SYSTEM_OFF:
 		kvm_psci_system_off(vcpu);
 		/*
@@ -157,12 +207,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		ret = 0;
 		break;
 	case PSCI_0_2_FN_CPU_SUSPEND:
-	case PSCI_0_2_FN_AFFINITY_INFO:
 	case PSCI_0_2_FN_MIGRATE:
 	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
 	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
 	case PSCI_0_2_FN64_CPU_SUSPEND:
-	case PSCI_0_2_FN64_AFFINITY_INFO:
 	case PSCI_0_2_FN64_MIGRATE:
 	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
 		val = PSCI_RET_NOT_SUPPORTED;

From f11d09d9c9ce2dcca77dd41fcb16c3811cc513d5 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:22 +0530
Subject: [PATCH 172/287] ARM/ARM64: KVM: Emulate PSCI v0.2 MIGRATE_INFO_TYPE
 and related functions

This patch adds emulation of PSCI v0.2 MIGRATE, MIGRATE_INFO_TYPE, and
MIGRATE_INFO_UP_CPU function calls for KVM ARM/ARM64.

KVM ARM/ARM64 being a hypervisor (and not a Trusted OS), we cannot provide
this functions hence we emulate these functions in following way:
1. MIGRATE - Returns "Not Supported"
2. MIGRATE_INFO_TYPE - Return 2 i.e. Trusted OS is not present
3. MIGRATE_INFO_UP_CPU - Returns "Not Supported"

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit bab0b43012a8ad64877fa46134370a7f5c6ce861)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 3b6a0cf25c7d..cce901a510fa 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -182,6 +182,22 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 	case PSCI_0_2_FN64_AFFINITY_INFO:
 		val = kvm_psci_vcpu_affinity_info(vcpu);
 		break;
+	case PSCI_0_2_FN_MIGRATE:
+	case PSCI_0_2_FN64_MIGRATE:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+		/*
+		 * Trusted OS is MP hence does not require migration
+	         * or
+		 * Trusted OS is not present
+		 */
+		val = PSCI_0_2_TOS_MP;
+		break;
+	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+		val = PSCI_RET_NOT_SUPPORTED;
+		break;
 	case PSCI_0_2_FN_SYSTEM_OFF:
 		kvm_psci_system_off(vcpu);
 		/*
@@ -207,12 +223,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		ret = 0;
 		break;
 	case PSCI_0_2_FN_CPU_SUSPEND:
-	case PSCI_0_2_FN_MIGRATE:
-	case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
-	case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
 	case PSCI_0_2_FN64_CPU_SUSPEND:
-	case PSCI_0_2_FN64_MIGRATE:
-	case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
 		val = PSCI_RET_NOT_SUPPORTED;
 		break;
 	default:

From 69590f71e0ef474e0d44514484308c2eb826306d Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:23 +0530
Subject: [PATCH 173/287] ARM/ARM64: KVM: Fix CPU_ON emulation for PSCI v0.2

As-per PSCI v0.2, the source CPU provides physical address of
"entry point" and "context id" for starting a target CPU. Also,
if target CPU is already running then we should return ALREADY_ON.

Current emulation of CPU_ON function does not consider physical
address of "context id" and returns INVALID_PARAMETERS if target
CPU is already running.

This patch updates kvm_psci_vcpu_on() such that it works for both
PSCI v0.1 and PSCI v0.2.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit aa8aeefe5e567637bbec7d7a3031cc057e3af303)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index cce901a510fa..1067579c7336 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -48,6 +48,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	struct kvm_vcpu *vcpu = NULL, *tmp;
 	wait_queue_head_t *wq;
 	unsigned long cpu_id;
+	unsigned long context_id;
 	unsigned long mpidr;
 	phys_addr_t target_pc;
 	int i;
@@ -68,10 +69,17 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	 * Make sure the caller requested a valid CPU and that the CPU is
 	 * turned off.
 	 */
-	if (!vcpu || !vcpu->arch.pause)
+	if (!vcpu)
 		return PSCI_RET_INVALID_PARAMS;
+	if (!vcpu->arch.pause) {
+		if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+			return PSCI_RET_ALREADY_ON;
+		else
+			return PSCI_RET_INVALID_PARAMS;
+	}
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
+	context_id = *vcpu_reg(source_vcpu, 3);
 
 	kvm_reset_vcpu(vcpu);
 
@@ -86,6 +94,11 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		kvm_vcpu_set_be(vcpu);
 
 	*vcpu_pc(vcpu) = target_pc;
+	/*
+	 * NOTE: We always update r0 (or x0) because for PSCI v0.1
+	 * the general puspose registers are undefined upon CPU_ON.
+	 */
+	*vcpu_reg(vcpu, 0) = context_id;
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
 

From 6824eda31449e4585085a49739d60e640b1c7583 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:24 +0530
Subject: [PATCH 174/287] ARM/ARM64: KVM: Emulate PSCI v0.2 CPU_SUSPEND

This patch adds emulation of PSCI v0.2 CPU_SUSPEND function call for
KVM ARM/ARM64. This is a CPU-level function call which can suspend
current CPU or current CPU cluster. We don't have VCPU clusters in
KVM so we only suspend the current VCPU.

The CPU_SUSPEND emulation is not tested much because currently there
is no CPUIDLE driver in Linux kernel that uses PSCI CPU_SUSPEND. The
PSCI CPU_SUSPEND implementation in ARM64 kernel was tested using a
Simple CPUIDLE driver which is not published due to unstable DT-bindings
for PSCI.
(For more info, http://lwn.net/Articles/574950/)

For simplicity, we implement CPU_SUSPEND emulation similar to WFI
(Wait-for-interrupt) emulation and we also treat power-down request
to be same as stand-by request. This is consistent with section
5.4.1 and section 5.4.2 of PSCI v0.2 specification.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit b376d02b53b87f8684f91f13ba4ee43331850fcd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 1067579c7336..09cf37737ee2 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -37,6 +37,26 @@ static unsigned long psci_affinity_mask(unsigned long affinity_level)
 	return 0;
 }
 
+static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * NOTE: For simplicity, we make VCPU suspend emulation to be
+	 * same-as WFI (Wait-for-interrupt) emulation.
+	 *
+	 * This means for KVM the wakeup events are interrupts and
+	 * this is consistent with intended use of StateID as described
+	 * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
+	 *
+	 * Further, we also treat power-down request to be same as
+	 * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
+	 * specification (ARM DEN 0022A). This means all suspend states
+	 * for KVM will preserve the register state.
+	 */
+	kvm_vcpu_block(vcpu);
+
+	return PSCI_RET_SUCCESS;
+}
+
 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pause = true;
@@ -183,6 +203,10 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		 */
 		val = 2;
 		break;
+	case PSCI_0_2_FN_CPU_SUSPEND:
+	case PSCI_0_2_FN64_CPU_SUSPEND:
+		val = kvm_psci_vcpu_suspend(vcpu);
+		break;
 	case PSCI_0_2_FN_CPU_OFF:
 		kvm_psci_vcpu_off(vcpu);
 		val = PSCI_RET_SUCCESS;
@@ -235,10 +259,6 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
 		val = PSCI_RET_INTERNAL_FAILURE;
 		ret = 0;
 		break;
-	case PSCI_0_2_FN_CPU_SUSPEND:
-	case PSCI_0_2_FN64_CPU_SUSPEND:
-		val = PSCI_RET_NOT_SUPPORTED;
-		break;
 	default:
 		return -EINVAL;
 	}

From f4aa5773a7ac1dad4bc6fc726b178547289e6045 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Tue, 29 Apr 2014 11:24:25 +0530
Subject: [PATCH 175/287] ARM/ARM64: KVM: Advertise KVM_CAP_ARM_PSCI_0_2 to
 user space

We have PSCI v0.2 emulation available in KVM ARM/ARM64
hence advertise this to user space (i.e. QEMU or KVMTOOL)
via KVM_CHECK_EXTENSION ioctl.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4447a208f7fc2e2dff8c6a8df2a1fd6dd72fb3e2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9804406ff37e..354dc42b6395 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -197,6 +197,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ARM_PSCI:
+	case KVM_CAP_ARM_PSCI_0_2:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:

From 5ab6e0ee8a2375651052456289f6d397f78f737e Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 16 Jan 2014 13:44:20 +0100
Subject: [PATCH 176/287] kvm/irqchip: Speed up KVM_SET_GSI_ROUTING

When starting lots of dataplane devices the bootup takes very long on
Christian's s390 with irqfd patches. With larger setups he is even
able to trigger some timeouts in some components.  Turns out that the
KVM_SET_GSI_ROUTING ioctl takes very long (strace claims up to 0.1 sec)
when having multiple CPUs.  This is caused by the  synchronize_rcu and
the HZ=100 of s390.  By changing the code to use a private srcu we can
speed things up.  This patch reduces the boot time till mounting root
from 8 to 2 seconds on my s390 guest with 100 disks.

Uses of hlist_for_each_entry_rcu, hlist_add_head_rcu, hlist_del_init_rcu
are fine because they do not have lockdep checks (hlist_for_each_entry_rcu
uses rcu_dereference_raw rather than rcu_dereference, and write-sides
do not do rcu lockdep at all).

Note that we're hardly relying on the "sleepable" part of srcu.  We just
want SRCU's faster detection of grace periods.

Testing was done by Andrew Theurer using netperf tests STREAM, MAERTS
and RR.  The difference between results "before" and "after" the patch
has mean -0.2% and standard deviation 0.6%.  Using a paired t-test on the
data points says that there is a 2.5% probability that the patch is the
cause of the performance difference (rather than a random fluctuation).

(Restricting the t-test to RR, which is the most likely to be affected,
changes the numbers to respectively -0.3% mean, 0.7% stdev, and 8%
probability that the numbers actually say something about the patch.
The probability increases mostly because there are fewer data points).

Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com> # s390
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 719d93cd5f5c5c8775b7a38192069e8e1d1ac46e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/eventfd.c       | 25 +++++++++++++++----------
 virt/kvm/irq_comm.c      | 17 +++++++++--------
 virt/kvm/irqchip.c       | 31 ++++++++++++++++---------------
 virt/kvm/kvm_main.c      | 16 ++++++++++------
 5 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eef946f92c0c..a3c83491a791 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -346,6 +346,7 @@ struct kvm {
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
 	struct srcu_struct srcu;
+	struct srcu_struct irq_srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 64ee720b75c7..b51c19ffd8fd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
+#include <linux/srcu.h>
 #include <linux/slab.h>
 
 #include "iodev.h"
@@ -118,19 +119,22 @@ static void
 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
 	struct _irqfd_resampler *resampler;
+	struct kvm *kvm;
 	struct _irqfd *irqfd;
+	int idx;
 
 	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+	kvm = resampler->kvm;
 
-	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
 		    resampler->notifier.gsi, 0, false);
 
-	rcu_read_lock();
+	idx = srcu_read_lock(&kvm->irq_srcu);
 
 	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
 		eventfd_signal(irqfd->resamplefd, 1);
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 static void
@@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 	mutex_lock(&kvm->irqfds.resampler_lock);
 
 	list_del_rcu(&irqfd->resampler_link);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 
 	if (list_empty(&resampler->list)) {
 		list_del(&resampler->link);
@@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	unsigned long flags = (unsigned long)key;
 	struct kvm_kernel_irq_routing_entry *irq;
 	struct kvm *kvm = irqfd->kvm;
+	int idx;
 
 	if (flags & POLLIN) {
-		rcu_read_lock();
-		irq = rcu_dereference(irqfd->irq_entry);
+		idx = srcu_read_lock(&kvm->irq_srcu);
+		irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
 		/* An event has been signaled, inject an interrupt */
 		if (irq)
 			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
 		else
 			schedule_work(&irqfd->inject);
-		rcu_read_unlock();
+		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
 
 	if (flags & POLLHUP) {
@@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		}
 
 		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
-		synchronize_rcu();
+		synchronize_srcu(&kvm->irq_srcu);
 
 		mutex_unlock(&kvm->irqfds.resampler_lock);
 	}
@@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 			 * another thread calls kvm_irq_routing_update before
 			 * we flush workqueue below (we synchronize with
 			 * kvm_irq_routing_update using irqfds.lock).
-			 * It is paired with synchronize_rcu done by caller
+			 * It is paired with synchronize_srcu done by caller
 			 * of that function.
 			 */
 			rcu_assign_pointer(irqfd->irq_entry, NULL);
@@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm)
 
 /*
  * Change irq_routing and irqfd.
- * Caller must invoke synchronize_rcu afterwards.
+ * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
 void kvm_irq_routing_update(struct kvm *kvm,
 			    struct kvm_irq_routing_table *irq_rt)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..ced4a542a031 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
+	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * Since there's no easy way to do this, we only support injecting MSI
 	 * which is limited to 1:1 GSI mapping.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
 			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
@@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 				ret = -EWOULDBLOCK;
 			break;
 		}
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
 
@@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_rcu(&kimn->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask)
 {
 	struct kvm_irq_mask_notifier *kimn;
-	int gsi;
+	int idx, gsi;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..b43c275775cd 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,6 +26,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
 #include <linux/export.h>
 #include <trace/events/kvm.h>
 #include "irq.h"
@@ -33,19 +34,19 @@
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	int gsi;
+	int gsi, idx;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi) {
-				rcu_read_unlock();
+				srcu_read_unlock(&kvm->irq_srcu, idx);
 				return true;
 			}
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	return false;
 }
@@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	int gsi;
+	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi)
 				kian->irq_acked(kian);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 #ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
 #endif
@@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
 	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0;
+	int ret = -1, i = 0, idx;
 	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, &irq_rt->map[irq], link)
 			irq_set[i++] = *e;
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
 		int r;
@@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	kvm_irq_routing_update(kvm, new);
 	mutex_unlock(&kvm->irq_lock);
 
-	synchronize_rcu();
+	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 	new = old;
 	r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9f5fab0a4fda..17b107fa7a2b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -456,11 +456,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 	r = hardware_enable_all();
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -472,10 +472,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	r = -ENOMEM;
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
 	kvm_init_memslots_id(kvm);
 	if (init_srcu_struct(&kvm->srcu))
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
+	if (init_srcu_struct(&kvm->irq_srcu))
+		goto out_err_no_irq_srcu;
 	for (i = 0; i < KVM_NR_BUSES; i++) {
 		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
 					GFP_KERNEL);
@@ -504,10 +506,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	return kvm;
 
 out_err:
+	cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
 	cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
 	hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm->buses[i]);
 	kfree(kvm->memslots);

From ccc916d9aac9d95baab922ab859f0ef37bb22558 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 2 May 2014 16:24:10 +0100
Subject: [PATCH 177/287] arm64: barriers: make use of barrier options with
 explicit barriers

When calling our low-level barrier macros directly, we can often suffice
with more relaxed behaviour than the default "all accesses, full system"
option.

This patch updates the users of dsb() to specify the option which they
actually require.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 98f7685ee69f871ba991089cb9685f0da07517ea)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 03244582bc55..c59a1bdab5eb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -71,13 +71,13 @@ static u32 get_ccsidr(u32 csselr)
 static void do_dc_cisw(u32 val)
 {
 	asm volatile("dc cisw, %x0" : : "r" (val));
-	dsb();
+	dsb(ish);
 }
 
 static void do_dc_csw(u32 val)
 {
 	asm volatile("dc csw, %x0" : : "r" (val));
-	dsb();
+	dsb(ish);
 }
 
 /* See note at ARM ARM B1.14.4 */

From 17acc52b78027a728c419a0d7b4d69d30b94294a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 2 May 2014 16:24:14 +0100
Subject: [PATCH 178/287] arm64: kvm: use inner-shareable barriers for
 inner-shareable maintenance

In order to ensure completion of inner-shareable maintenance instructions
(cache and TLB) on AArch64, we can use the -ish suffix to the dsb
instruction.

This patch relaxes our dsb sy instructions to dsb ish where possible.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit ee9e101c11478680d579bd20bb38a4d3e2514fe3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 2c56012cb2d2..b0d1512acf08 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -630,9 +630,15 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	 * whole of Stage-1. Weep...
 	 */
 	tlbi	ipas2e1is, x1
-	dsb	sy
+	/*
+	 * We have to ensure completion of the invalidation at Stage-2,
+	 * since a table walk on another CPU could refill a TLB with a
+	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
+	 * the Stage-1 invalidation happened first.
+	 */
+	dsb	ish
 	tlbi	vmalle1is
-	dsb	sy
+	dsb	ish
 	isb
 
 	msr	vttbr_el2, xzr
@@ -643,7 +649,7 @@ ENTRY(__kvm_flush_vm_context)
 	dsb	ishst
 	tlbi	alle1is
 	ic	ialluis
-	dsb	sy
+	dsb	ish
 	ret
 ENDPROC(__kvm_flush_vm_context)
 

From 502f5822c7ac7b11040d4abb7ca1d09d56de251c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 20 May 2014 18:06:03 +0100
Subject: [PATCH 179/287] arm64: KVM: Enable minimalistic support for
 Cortex-A53

In order to allow KVM to run on Cortex-A53 implementations, wire the
minimal support required.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1252b3313642c3d0dff5b951b625468bf0dcd059)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/cputype.h     | 1 +
 arch/arm64/include/uapi/asm/kvm.h    | 3 ++-
 arch/arm64/kvm/guest.c               | 2 ++
 arch/arm64/kvm/sys_regs_generic_v8.c | 2 ++
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5fe138e0b828..343f7f737970 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,6 +41,7 @@
 
 #define ARM_CPU_PART_AEM_V8	0xD0F0
 #define ARM_CPU_PART_FOUNDATION	0xD000
+#define ARM_CPU_PART_CORTEX_A53	0xD030
 #define ARM_CPU_PART_CORTEX_A57	0xD070
 
 #define APM_CPU_PART_POTENZA	0x0000
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index e6471daf3fb5..e633ff8cdec8 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -57,8 +57,9 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_FOUNDATION_V8	1
 #define KVM_ARM_TARGET_CORTEX_A57	2
 #define KVM_ARM_TARGET_XGENE_POTENZA	3
+#define KVM_ARM_TARGET_CORTEX_A53	4
 
-#define KVM_ARM_NUM_TARGETS		4
+#define KVM_ARM_NUM_TARGETS		5
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 08745578d54d..60b5c31f3c10 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -214,6 +214,8 @@ int __attribute_const__ kvm_target_cpu(void)
 			return KVM_ARM_TARGET_AEM_V8;
 		case ARM_CPU_PART_FOUNDATION:
 			return KVM_ARM_TARGET_FOUNDATION_V8;
+		case ARM_CPU_PART_CORTEX_A53:
+			return KVM_ARM_TARGET_CORTEX_A53;
 		case ARM_CPU_PART_CORTEX_A57:
 			return KVM_ARM_TARGET_CORTEX_A57;
 		};
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 8fe6f76b0edc..475fd2929310 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -88,6 +88,8 @@ static int __init sys_reg_genericv8_init(void)
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
 					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,

From c3bd5cf97635e0d04a2b0a7025d4a7512a6e6345 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 3 Jun 2014 13:44:17 +0200
Subject: [PATCH 180/287] KVM: add missing cleanup_srcu_struct

Reported-by: hrg <hrgstephen@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 820b3fcdeb80d30410f4427d2cbf9161c35fdeef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 17b107fa7a2b..3db56912caed 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -607,6 +607,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	kvm_free_physmem(kvm);
+	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
 	hardware_disable_all();

From 892f5c6841880062cbe947d08131c2c55613ae94 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 12 May 2014 16:05:13 +0200
Subject: [PATCH 181/287] KVM: prepare for KVM_(S|G)ET_MP_STATE on other
 architectures

Highlight the aspects of the ioctls that are actually specific to x86
and ia64. As defined restrictions (irqchip) and mp states may not apply
to other architectures, these parts are flagged to belong to x86 and ia64.

In preparation for the use of KVM_(S|G)ET_MP_STATE by s390.
Fix a spelling error (KVM_SET_MP_STATE vs. KVM_SET_MPSTATE) on the way.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
(cherry picked from commit 0b4820d6d8b6448bc9f7fac1bb1a801a53b425e1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt | 21 ++++++++++++---------
 include/uapi/linux/kvm.h          |  3 ++-
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 48e7888291f5..8d135672b69a 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -969,18 +969,20 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal
+                                 which has not yet received an INIT signal [x86,
+                                 ia64]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI
+                                 now ready for a SIPI [x86, ia64]
  - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt
+                                 is waiting for an interrupt [x86, ia64]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS)
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
 
-This ioctl is only useful after KVM_CREATE_IRQCHIP.  Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
 
 
 4.39 KVM_SET_MP_STATE
@@ -994,8 +996,9 @@ Returns: 0 on success; -1 on error
 Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
 arguments.
 
-This ioctl is only useful after KVM_CREATE_IRQCHIP.  Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
 
 
 4.40 KVM_SET_IDENTITY_MAP_ADDR
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0a70874e4a63..521e4c0a08ac 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -399,8 +399,9 @@ struct kvm_vapic_addr {
 	__u64 vapic_addr;
 };
 
-/* for KVM_SET_MPSTATE */
+/* for KVM_SET_MP_STATE */
 
+/* not all states are valid on all architectures */
 #define KVM_MP_STATE_RUNNABLE          0
 #define KVM_MP_STATE_UNINITIALIZED     1
 #define KVM_MP_STATE_INIT_RECEIVED     2

From bdcfc46a6f2760588d3e59e2ff7abf235a2a513a Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 9 May 2014 23:31:31 +0200
Subject: [PATCH 182/287] arm/arm64: KVM: Fix and refactor unmap_range

unmap_range() was utterly broken, to quote Marc, and broke in all sorts
of situations.  It was also quite complicated to follow and didn't
follow the usual scheme of having a separate iterating function for each
level of page tables.

Address this by refactoring the code and introduce a pgd_clear()
function.

Reviewed-by: Jungseok Lee <jays.lee@samsung.com>
Reviewed-by: Mario Smarduch <m.smarduch@samsung.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4f853a714bf16338ff5261128e6c7ae2569e9505)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   |  12 +++
 arch/arm/kvm/mmu.c               | 165 ++++++++++++++++---------------
 arch/arm64/include/asm/kvm_mmu.h |  15 +++
 3 files changed, 115 insertions(+), 77 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5c7aa3c1519f..5cc0b0f5f72f 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -127,6 +127,18 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
 })
 
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define kvm_pud_table_empty(pudp) (0)
+
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16f804938b8f..23360610aeac 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -90,104 +90,115 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
 	return p;
 }
 
-static bool page_empty(void *ptr)
+static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
 {
-	struct page *ptr_page = virt_to_page(ptr);
-	return page_count(ptr_page) == 1;
+	pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
+	pgd_clear(pgd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pud_free(NULL, pud_table);
+	put_page(virt_to_page(pgd));
 }
 
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
-	if (pud_huge(*pud)) {
-		pud_clear(pud);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	} else {
-		pmd_t *pmd_table = pmd_offset(pud, 0);
-		pud_clear(pud);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-		pmd_free(NULL, pmd_table);
-	}
+	pmd_t *pmd_table = pmd_offset(pud, 0);
+	VM_BUG_ON(pud_huge(*pud));
+	pud_clear(pud);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pmd_free(NULL, pmd_table);
 	put_page(virt_to_page(pud));
 }
 
 static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
 {
-	if (kvm_pmd_huge(*pmd)) {
-		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	} else {
-		pte_t *pte_table = pte_offset_kernel(pmd, 0);
-		pmd_clear(pmd);
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-		pte_free_kernel(NULL, pte_table);
-	}
+	pte_t *pte_table = pte_offset_kernel(pmd, 0);
+	VM_BUG_ON(kvm_pmd_huge(*pmd));
+	pmd_clear(pmd);
+	kvm_tlb_flush_vmid_ipa(kvm, addr);
+	pte_free_kernel(NULL, pte_table);
 	put_page(virt_to_page(pmd));
 }
 
-static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	if (pte_present(*pte)) {
-		kvm_set_pte(pte, __pte(0));
-		put_page(virt_to_page(pte));
-		kvm_tlb_flush_vmid_ipa(kvm, addr);
-	}
+	phys_addr_t start_addr = addr;
+	pte_t *pte, *start_pte;
+
+	start_pte = pte = pte_offset_kernel(pmd, addr);
+	do {
+		if (!pte_none(*pte)) {
+			kvm_set_pte(pte, __pte(0));
+			put_page(virt_to_page(pte));
+			kvm_tlb_flush_vmid_ipa(kvm, addr);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	if (kvm_pte_table_empty(start_pte))
+		clear_pmd_entry(kvm, pmd, start_addr);
 }
 
-static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
-			unsigned long long start, u64 size)
+static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+		       phys_addr_t addr, phys_addr_t end)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long long addr = start, end = start + size;
-	u64 next;
+	phys_addr_t next, start_addr = addr;
+	pmd_t *pmd, *start_pmd;
 
-	while (addr < end) {
-		pgd = pgdp + pgd_index(addr);
-		pud = pud_offset(pgd, addr);
-		pte = NULL;
-		if (pud_none(*pud)) {
-			addr = kvm_pud_addr_end(addr, end);
-			continue;
-		}
-
-		if (pud_huge(*pud)) {
-			/*
-			 * If we are dealing with a huge pud, just clear it and
-			 * move on.
-			 */
-			clear_pud_entry(kvm, pud, addr);
-			addr = kvm_pud_addr_end(addr, end);
-			continue;
-		}
-
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			addr = kvm_pmd_addr_end(addr, end);
-			continue;
-		}
-
-		if (!kvm_pmd_huge(*pmd)) {
-			pte = pte_offset_kernel(pmd, addr);
-			clear_pte_entry(kvm, pte, addr);
-			next = addr + PAGE_SIZE;
-		}
-
-		/*
-		 * If the pmd entry is to be cleared, walk back up the ladder
-		 */
-		if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) {
-			clear_pmd_entry(kvm, pmd, addr);
-			next = kvm_pmd_addr_end(addr, end);
-			if (page_empty(pmd) && !page_empty(pud)) {
-				clear_pud_entry(kvm, pud, addr);
-				next = kvm_pud_addr_end(addr, end);
+	start_pmd = pmd = pmd_offset(pud, addr);
+	do {
+		next = kvm_pmd_addr_end(addr, end);
+		if (!pmd_none(*pmd)) {
+			if (kvm_pmd_huge(*pmd)) {
+				pmd_clear(pmd);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pmd));
+			} else {
+				unmap_ptes(kvm, pmd, addr, next);
 			}
 		}
+	} while (pmd++, addr = next, addr != end);
 
-		addr = next;
-	}
+	if (kvm_pmd_table_empty(start_pmd))
+		clear_pud_entry(kvm, pud, start_addr);
+}
+
+static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+		       phys_addr_t addr, phys_addr_t end)
+{
+	phys_addr_t next, start_addr = addr;
+	pud_t *pud, *start_pud;
+
+	start_pud = pud = pud_offset(pgd, addr);
+	do {
+		next = kvm_pud_addr_end(addr, end);
+		if (!pud_none(*pud)) {
+			if (pud_huge(*pud)) {
+				pud_clear(pud);
+				kvm_tlb_flush_vmid_ipa(kvm, addr);
+				put_page(virt_to_page(pud));
+			} else {
+				unmap_pmds(kvm, pud, addr, next);
+			}
+		}
+	} while (pud++, addr = next, addr != end);
+
+	if (kvm_pud_table_empty(start_pud))
+		clear_pgd_entry(kvm, pgd, start_addr);
+}
+
+
+static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
+			phys_addr_t start, u64 size)
+{
+	pgd_t *pgd;
+	phys_addr_t addr = start, end = start + size;
+	phys_addr_t next;
+
+	pgd = pgdp + pgd_index(addr);
+	do {
+		next = kvm_pgd_addr_end(addr, end);
+		unmap_puds(kvm, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
 }
 
 static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7d29847a893b..8e138c7c53ac 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -125,6 +125,21 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
 #define kvm_pud_addr_end(addr, end)	pud_addr_end(addr, end)
 #define kvm_pmd_addr_end(addr, end)	pmd_addr_end(addr, end)
 
+static inline bool kvm_page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#ifndef CONFIG_ARM64_64K_PAGES
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#else
+#define kvm_pmd_table_empty(pmdp) (0)
+#endif
+#define kvm_pud_table_empty(pudp) (0)
+
+
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))

From 332075bc83a401bc1a4deb1caeea1d45bd340866 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Fri, 6 Jun 2014 11:10:23 +0200
Subject: [PATCH 183/287] ARM: KVM: Unmap IPA on memslot delete/move

Currently when a KVM region is deleted or moved after
KVM_SET_USER_MEMORY_REGION ioctl, the corresponding
intermediate physical memory is not unmapped.

This patch corrects this and unmaps the region's IPA range
in kvm_arch_commit_memory_region using unmap_stage2_range.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit df6ce24f2ee485c4f9a5cb610063a5eb60da8267)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 37 -------------------------------------
 arch/arm/kvm/mmu.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 354dc42b6395..e2c2bfd4da95 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -155,16 +155,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
 
 /**
  * kvm_arch_destroy_vm - destroy the VM data structure
@@ -225,33 +215,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-				   struct kvm_memory_slot *memslot,
-				   struct kvm_userspace_memory_region *mem,
-				   enum kvm_mr_change change)
-{
-	return 0;
-}
-
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-				   struct kvm_userspace_memory_region *mem,
-				   const struct kvm_memory_slot *old,
-				   enum kvm_mr_change change)
-{
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 23360610aeac..b2a708be1407 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1111,3 +1111,49 @@ int kvm_mmu_init(void)
 	free_hyp_pgds();
 	return err;
 }
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+				   struct kvm_userspace_memory_region *mem,
+				   const struct kvm_memory_slot *old,
+				   enum kvm_mr_change change)
+{
+	gpa_t gpa = old->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = old->npages << PAGE_SHIFT;
+	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+		spin_lock(&kvm->mmu_lock);
+		unmap_stage2_range(kvm, gpa, size);
+		spin_unlock(&kvm->mmu_lock);
+	}
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+				   struct kvm_memory_slot *memslot,
+				   struct kvm_userspace_memory_region *mem,
+				   enum kvm_mr_change change)
+{
+	return 0;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+			   struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+			    unsigned long npages)
+{
+	return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+}

From c2ca19e2dc3f71d003a4ab691462d1df6974d00f Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@linaro.org>
Date: Thu, 26 Jun 2014 01:45:51 +0100
Subject: [PATCH 184/287] ARM: KVM: user_mem_abort: support stage 2 MMIO page
 mapping

A userspace process can map device MMIO memory via VFIO or /dev/mem,
e.g., for platform device passthrough support in QEMU.

During early development, we found the PAGE_S2 memory type being used
for MMIO mappings.  This patch corrects that by using the more strongly
ordered memory type for device MMIO mappings: PAGE_S2_DEVICE.

Signed-off-by: Kim Phillips <kim.phillips@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b88657674d39fc2127d62d0de9ca142e166443c8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b2a708be1407..16e7994bf347 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -759,6 +759,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
 	pfn_t pfn;
+	pgprot_t mem_type = PAGE_S2;
 
 	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -809,6 +810,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (is_error_pfn(pfn))
 		return -EFAULT;
 
+	if (kvm_is_mmio_pfn(pfn))
+		mem_type = PAGE_S2_DEVICE;
+
 	spin_lock(&kvm->mmu_lock);
 	if (mmu_notifier_retry(kvm, mmu_seq))
 		goto out_unlock;
@@ -816,7 +820,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
 	if (hugetlb) {
-		pmd_t new_pmd = pfn_pmd(pfn, PAGE_S2);
+		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
 		if (writable) {
 			kvm_set_s2pmd_writable(&new_pmd);
@@ -825,13 +829,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
-		pte_t new_pte = pfn_pte(pfn, PAGE_S2);
+		pte_t new_pte = pfn_pte(pfn, mem_type);
 		if (writable) {
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
 		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
-		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
+		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
+				     mem_type == PAGE_S2_DEVICE);
 	}
 
 

From 896f51b90443e91d7358914df4a24d9c152c2b48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Tue, 1 Jul 2014 16:53:13 +0100
Subject: [PATCH 185/287] arm64: KVM: export demux regids as KVM_REG_ARM64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I suspect this is a -ECUTPASTE fault from the initial implementation. If
we don't declare the register ID to be KVM_REG_ARM64 the KVM_GET_ONE_REG
implementation kvm_arm_get_reg() returns -EINVAL and hilarity ensues.

The kvm/api.txt document describes all arm64 registers as starting with
0x60xx... (i.e KVM_REG_ARM64).

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit efd48ceacea78e4d4656aa0a6bf4c5b92ed22130)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c59a1bdab5eb..34f25a590bd7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -962,7 +962,7 @@ static unsigned int num_demux_regs(void)
 
 static int write_demux_regids(u64 __user *uindices)
 {
-	u64 val = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+	u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
 	unsigned int i;
 
 	val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;

From c675c7cef37bad776609e6923fe64019c10e2945 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
Date: Fri, 4 Jul 2014 15:54:14 +0100
Subject: [PATCH 186/287] arm64: KVM: allow export and import of generic timer
 regs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For correct guest suspend/resume behaviour we need to ensure we include
the generic timer registers for 64 bit guests. As CONFIG_KVM_ARM_TIMER is
always set for arm64 we don't need to worry about null implementations.
However I have re-jigged the kvm_arm_timer_set/get_reg declarations to
be in the common include/kvm/arm_arch_timer.h headers.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1df08ba0aa95f1a8832b7162eec51069bd9be7ae)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h |  3 --
 arch/arm/kvm/guest.c            | 10 -----
 arch/arm64/kvm/guest.c          | 68 ++++++++++++++++++++++++++++++++-
 include/kvm/arm_arch_timer.h    | 14 +++++++
 4 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 193ceaf01bfd..dc4e3edf39cc 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -228,7 +228,4 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
-int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
-
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index b23a59c1c522..986e625b5dbd 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -124,16 +124,6 @@ static bool is_timer_reg(u64 index)
 	return false;
 }
 
-int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
-	return 0;
-}
-
-u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
-	return 0;
-}
-
 #else
 
 #define NUM_TIMER_REGS 3
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 60b5c31f3c10..8d1ec2887a26 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -135,6 +135,59 @@ static unsigned long num_core_regs(void)
 	return sizeof(struct kvm_regs) / sizeof(__u32);
 }
 
+/**
+ * ARM64 versions of the TIMER registers, always available on arm64
+ */
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return ret;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
 /**
  * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
  *
@@ -142,7 +195,8 @@ static unsigned long num_core_regs(void)
  */
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
-	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu);
+	return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
+                + NUM_TIMER_REGS;
 }
 
 /**
@@ -154,6 +208,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
 	unsigned int i;
 	const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+	int ret;
 
 	for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
 		if (put_user(core_reg | i, uindices))
@@ -161,6 +216,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 		uindices++;
 	}
 
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
 	return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
 }
 
@@ -174,6 +234,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return get_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
 	return kvm_arm_sys_reg_get_reg(vcpu, reg);
 }
 
@@ -187,6 +250,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return set_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
 	return kvm_arm_sys_reg_set_reg(vcpu, reg);
 }
 
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6d9aeddc09bf..ad9db6045b2f 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -67,6 +67,10 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
+int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+
 #else
 static inline int kvm_timer_hyp_init(void)
 {
@@ -84,6 +88,16 @@ static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
+
+static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	return 0;
+}
+
+static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	return 0;
+}
 #endif
 
 #endif

From 7f124577d6247ae088a0acb0762297a5bdaeb9a3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 30 May 2013 10:20:36 +0100
Subject: [PATCH 187/287] KVM: arm/arm64: vgic: move GICv2 registers to their
 own structure

In order to make way for the GICv3 registers, move the v2-specific
registers to their own structure.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit eede821dbfd58df89edb072da64e006321eaef58)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kernel/asm-offsets.c   | 14 ++++-----
 arch/arm/kvm/interrupts_head.S  | 26 +++++++--------
 arch/arm64/kernel/asm-offsets.c | 14 ++++-----
 arch/arm64/kvm/hyp.S            | 26 +++++++--------
 include/kvm/arm_vgic.h          | 20 +++++++-----
 virt/kvm/arm/vgic.c             | 56 ++++++++++++++++-----------------
 6 files changed, 81 insertions(+), 75 deletions(-)

diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index aa2acc1dd986..776d9186e9c1 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -176,13 +176,13 @@ int main(void)
   DEFINE(VCPU_HYP_PC,		offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
 #ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
 #ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 76af93025574..e4eaf30205c5 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -421,14 +421,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r9, [r2, #GICH_ELRSR1]
 	ldr	r10, [r2, #GICH_APR]
 
-	str	r3, [r11, #VGIC_CPU_HCR]
-	str	r4, [r11, #VGIC_CPU_VMCR]
-	str	r5, [r11, #VGIC_CPU_MISR]
-	str	r6, [r11, #VGIC_CPU_EISR]
-	str	r7, [r11, #(VGIC_CPU_EISR + 4)]
-	str	r8, [r11, #VGIC_CPU_ELRSR]
-	str	r9, [r11, #(VGIC_CPU_ELRSR + 4)]
-	str	r10, [r11, #VGIC_CPU_APR]
+	str	r3, [r11, #VGIC_V2_CPU_HCR]
+	str	r4, [r11, #VGIC_V2_CPU_VMCR]
+	str	r5, [r11, #VGIC_V2_CPU_MISR]
+	str	r6, [r11, #VGIC_V2_CPU_EISR]
+	str	r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
+	str	r8, [r11, #VGIC_V2_CPU_ELRSR]
+	str	r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	r10, [r11, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
 	mov	r5, #0
@@ -436,7 +436,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Save list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r2], #4
 	str	r6, [r3], #4
@@ -463,9 +463,9 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r11, vcpu, #VCPU_VGIC_CPU
 
 	/* We only restore a minimal set of registers */
-	ldr	r3, [r11, #VGIC_CPU_HCR]
-	ldr	r4, [r11, #VGIC_CPU_VMCR]
-	ldr	r8, [r11, #VGIC_CPU_APR]
+	ldr	r3, [r11, #VGIC_V2_CPU_HCR]
+	ldr	r4, [r11, #VGIC_V2_CPU_VMCR]
+	ldr	r8, [r11, #VGIC_V2_CPU_APR]
 
 	str	r3, [r2, #GICH_HCR]
 	str	r4, [r2, #GICH_VMCR]
@@ -473,7 +473,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 
 	/* Restore list registers */
 	add	r2, r2, #GICH_LR0
-	add	r3, r11, #VGIC_CPU_LR
+	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r3], #4
 	str	r6, [r2], #4
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 666e231d410b..dcfd8a616a94 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -127,13 +127,13 @@ int main(void)
   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
   DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_CPU_HCR,		offsetof(struct vgic_cpu, vgic_hcr));
-  DEFINE(VGIC_CPU_VMCR,		offsetof(struct vgic_cpu, vgic_vmcr));
-  DEFINE(VGIC_CPU_MISR,		offsetof(struct vgic_cpu, vgic_misr));
-  DEFINE(VGIC_CPU_EISR,		offsetof(struct vgic_cpu, vgic_eisr));
-  DEFINE(VGIC_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_elrsr));
-  DEFINE(VGIC_CPU_APR,		offsetof(struct vgic_cpu, vgic_apr));
-  DEFINE(VGIC_CPU_LR,		offsetof(struct vgic_cpu, vgic_lr));
+  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b0d1512acf08..877d82a134bc 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -412,14 +412,14 @@ CPU_BE(	rev	w9,  w9  )
 CPU_BE(	rev	w10, w10 )
 CPU_BE(	rev	w11, w11 )
 
-	str	w4, [x3, #VGIC_CPU_HCR]
-	str	w5, [x3, #VGIC_CPU_VMCR]
-	str	w6, [x3, #VGIC_CPU_MISR]
-	str	w7, [x3, #VGIC_CPU_EISR]
-	str	w8, [x3, #(VGIC_CPU_EISR + 4)]
-	str	w9, [x3, #VGIC_CPU_ELRSR]
-	str	w10, [x3, #(VGIC_CPU_ELRSR + 4)]
-	str	w11, [x3, #VGIC_CPU_APR]
+	str	w4, [x3, #VGIC_V2_CPU_HCR]
+	str	w5, [x3, #VGIC_V2_CPU_VMCR]
+	str	w6, [x3, #VGIC_V2_CPU_MISR]
+	str	w7, [x3, #VGIC_V2_CPU_EISR]
+	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
 	str	wzr, [x2, #GICH_HCR]
@@ -427,7 +427,7 @@ CPU_BE(	rev	w11, w11 )
 	/* Save list registers */
 	add	x2, x2, #GICH_LR0
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_CPU_LR
+	add	x3, x3, #VGIC_V2_CPU_LR
 1:	ldr	w5, [x2], #4
 CPU_BE(	rev	w5, w5 )
 	str	w5, [x3], #4
@@ -452,9 +452,9 @@ CPU_BE(	rev	w5, w5 )
 	add	x3, x0, #VCPU_VGIC_CPU
 
 	/* We only restore a minimal set of registers */
-	ldr	w4, [x3, #VGIC_CPU_HCR]
-	ldr	w5, [x3, #VGIC_CPU_VMCR]
-	ldr	w6, [x3, #VGIC_CPU_APR]
+	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_V2_CPU_APR]
 CPU_BE(	rev	w4, w4 )
 CPU_BE(	rev	w5, w5 )
 CPU_BE(	rev	w6, w6 )
@@ -466,7 +466,7 @@ CPU_BE(	rev	w6, w6 )
 	/* Restore list registers */
 	add	x2, x2, #GICH_LR0
 	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_CPU_LR
+	add	x3, x3, #VGIC_V2_CPU_LR
 1:	ldr	w5, [x3], #4
 CPU_BE(	rev	w5, w5 )
 	str	w5, [x2], #4
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f27000f55a83..f738e5a69ee9 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -110,6 +110,16 @@ struct vgic_dist {
 #endif
 };
 
+struct vgic_v2_cpu_if {
+	u32		vgic_hcr;
+	u32		vgic_vmcr;
+	u32		vgic_misr;	/* Saved only */
+	u32		vgic_eisr[2];	/* Saved only */
+	u32		vgic_elrsr[2];	/* Saved only */
+	u32		vgic_apr;
+	u32		vgic_lr[VGIC_MAX_LRS];
+};
+
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
 	/* per IRQ to LR mapping */
@@ -126,13 +136,9 @@ struct vgic_cpu {
 	int		nr_lr;
 
 	/* CPU vif control registers for world switch */
-	u32		vgic_hcr;
-	u32		vgic_vmcr;
-	u32		vgic_misr;	/* Saved only */
-	u32		vgic_eisr[2];	/* Saved only */
-	u32		vgic_elrsr[2];	/* Saved only */
-	u32		vgic_apr;
-	u32		vgic_lr[VGIC_MAX_LRS];
+	union {
+		struct vgic_v2_cpu_if	vgic_v2;
+	};
 #endif
 };
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 56ff9bebb577..0ba1ab0721fd 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -601,7 +601,7 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
 {
 	clear_bit(lr_nr, vgic_cpu->lr_used);
-	vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
+	vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE;
 	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
 }
 
@@ -626,7 +626,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 	u32 *lr;
 
 	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		lr = &vgic_cpu->vgic_lr[i];
+		lr = &vgic_cpu->vgic_v2.vgic_lr[i];
 		irq = LR_IRQID(*lr);
 		source_cpu = LR_CPUID(*lr);
 
@@ -1007,7 +1007,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	int lr;
 
 	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+		int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
 
 		if (!vgic_irq_is_enabled(vcpu, irq)) {
 			vgic_retire_lr(lr, irq, vgic_cpu);
@@ -1037,11 +1037,11 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
 	/* Do we have an active interrupt for the same CPUID? */
 	if (lr != LR_EMPTY &&
-	    (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
+	    (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) {
 		kvm_debug("LR%d piggyback for IRQ%d %x\n",
-			  lr, irq, vgic_cpu->vgic_lr[lr]);
+			  lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]);
 		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
+		vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT;
 		return true;
 	}
 
@@ -1052,12 +1052,12 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
+	vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
 	vgic_cpu->vgic_irq_lr_map[irq] = lr;
 	set_bit(lr, vgic_cpu->lr_used);
 
 	if (!vgic_irq_is_edge(vcpu, irq))
-		vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
+		vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI;
 
 	return true;
 }
@@ -1155,9 +1155,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 epilog:
 	if (overflow) {
-		vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
+		vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE;
 	} else {
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
+		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 		/*
 		 * We're about to run this VCPU, and we've consumed
 		 * everything the distributor had in store for
@@ -1173,21 +1173,21 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	bool level_pending = false;
 
-	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
+	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr);
 
-	if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
+	if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) {
 		/*
 		 * Some level interrupts have been EOIed. Clear their
 		 * active bit.
 		 */
 		int lr, irq;
 
-		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
+		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr,
 				 vgic_cpu->nr_lr) {
-			irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+			irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
 
 			vgic_irq_clear_active(vcpu, irq);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
+			vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI;
 
 			/* Any additional pending interrupt? */
 			if (vgic_dist_irq_is_pending(vcpu, irq)) {
@@ -1201,13 +1201,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 			 * Despite being EOIed, the LR may not have
 			 * been marked as empty.
 			 */
-			set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
-			vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
+			set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr);
+			vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
 		}
 	}
 
-	if (vgic_cpu->vgic_misr & GICH_MISR_U)
-		vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
+	if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U)
+		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 
 	return level_pending;
 }
@@ -1226,21 +1226,21 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	level_pending = vgic_process_maintenance(vcpu);
 
 	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
+	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
 			 vgic_cpu->nr_lr) {
 		int irq;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
 			continue;
 
-		irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
+		irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
 
 		BUG_ON(irq >= VGIC_NR_IRQS);
 		vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
 	}
 
 	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
+	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
 				      vgic_cpu->nr_lr);
 	if (level_pending || pending < vgic_cpu->nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
@@ -1436,10 +1436,10 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	 * points to their reset values. Anything else resets to zero
 	 * anyway.
 	 */
-	vgic_cpu->vgic_vmcr = 0;
+	vgic_cpu->vgic_v2.vgic_vmcr = 0;
 
 	vgic_cpu->nr_lr = vgic_nr_lr;
-	vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
+	vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
 
 	return 0;
 }
@@ -1746,15 +1746,15 @@ static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
 	}
 
 	if (!mmio->is_write) {
-		reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
+		reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift;
 		mmio_data_write(mmio, ~0, reg);
 	} else {
 		reg = mmio_data_read(mmio, ~0);
 		reg = (reg << shift) & mask;
-		if (reg != (vgic_cpu->vgic_vmcr & mask))
+		if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask))
 			updated = true;
-		vgic_cpu->vgic_vmcr &= ~mask;
-		vgic_cpu->vgic_vmcr |= reg;
+		vgic_cpu->vgic_v2.vgic_vmcr &= ~mask;
+		vgic_cpu->vgic_v2.vgic_vmcr |= reg;
 	}
 	return updated;
 }

From 9f92df7009267f1a105e1338def7e99da648417c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 3 Jun 2013 15:55:02 +0100
Subject: [PATCH 188/287] KVM: ARM: vgic: introduce vgic_ops and LR
 manipulation primitives

In order to split the various register manipulation from the main vgic
code, introduce a vgic_ops structure, and start by abstracting the
LR manipulation code with a couple of accessors.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 8d5c6b06a5d5f8ebcf40558e566781d572920740)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  18 +++++
 virt/kvm/arm/vgic.c    | 162 ++++++++++++++++++++++++++++-------------
 2 files changed, 128 insertions(+), 52 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f738e5a69ee9..17bbe51b79a1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -68,6 +68,24 @@ struct vgic_bytemap {
 	u32 shared[VGIC_NR_SHARED_IRQS  / 4];
 };
 
+struct kvm_vcpu;
+
+#define LR_STATE_PENDING	(1 << 0)
+#define LR_STATE_ACTIVE		(1 << 1)
+#define LR_STATE_MASK		(3 << 0)
+#define LR_EOI_INT		(1 << 2)
+
+struct vgic_lr {
+	u16	irq;
+	u8	source;
+	u8	state;
+};
+
+struct vgic_ops {
+	struct vgic_lr	(*get_lr)(const struct kvm_vcpu *, int);
+	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+};
+
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
 	spinlock_t		lock;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 0ba1ab0721fd..11408fee600e 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -94,9 +94,12 @@ static struct device_node *vgic_node;
 #define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
 
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static u32 vgic_nr_lr;
 
 static unsigned int vgic_maint_irq;
@@ -593,18 +596,6 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
 	return false;
 }
 
-#define LR_CPUID(lr)	\
-	(((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
-#define LR_IRQID(lr)	\
-	((lr) & GICH_LR_VIRTUALID)
-
-static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
-{
-	clear_bit(lr_nr, vgic_cpu->lr_used);
-	vgic_cpu->vgic_v2.vgic_lr[lr_nr] &= ~GICH_LR_STATE;
-	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-}
-
 /**
  * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
@@ -622,13 +613,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	int vcpu_id = vcpu->vcpu_id;
-	int i, irq, source_cpu;
-	u32 *lr;
+	int i;
 
 	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		lr = &vgic_cpu->vgic_v2.vgic_lr[i];
-		irq = LR_IRQID(*lr);
-		source_cpu = LR_CPUID(*lr);
+		struct vgic_lr lr = vgic_get_lr(vcpu, i);
 
 		/*
 		 * There are three options for the state bits:
@@ -640,7 +628,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * If the LR holds only an active interrupt (not pending) then
 		 * just leave it alone.
 		 */
-		if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
+		if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
 			continue;
 
 		/*
@@ -649,18 +637,19 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * is fine, then we are only setting a few bits that were
 		 * already set.
 		 */
-		vgic_dist_irq_set(vcpu, irq);
-		if (irq < VGIC_NR_SGIS)
-			dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
-		*lr &= ~GICH_LR_PENDING_BIT;
+		vgic_dist_irq_set(vcpu, lr.irq);
+		if (lr.irq < VGIC_NR_SGIS)
+			dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
+		lr.state &= ~LR_STATE_PENDING;
+		vgic_set_lr(vcpu, i, lr);
 
 		/*
 		 * If there's no state left on the LR (it could still be
 		 * active), then the LR does not hold any useful info and can
 		 * be marked as free for other use.
 		 */
-		if (!(*lr & GICH_LR_STATE))
-			vgic_retire_lr(i, irq, vgic_cpu);
+		if (!(lr.state & LR_STATE_MASK))
+			vgic_retire_lr(i, lr.irq, vcpu);
 
 		/* Finally update the VGIC state. */
 		vgic_update_state(vcpu->kvm);
@@ -989,8 +978,69 @@ static void vgic_update_state(struct kvm *kvm)
 	}
 }
 
-#define MK_LR_PEND(src, irq)	\
-	(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
+static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	struct vgic_lr lr_desc;
+	u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+
+	lr_desc.irq	= val & GICH_LR_VIRTUALID;
+	if (lr_desc.irq <= 15)
+		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+	else
+		lr_desc.source = 0;
+	lr_desc.state	= 0;
+
+	if (val & GICH_LR_PENDING_BIT)
+		lr_desc.state |= LR_STATE_PENDING;
+	if (val & GICH_LR_ACTIVE_BIT)
+		lr_desc.state |= LR_STATE_ACTIVE;
+	if (val & GICH_LR_EOI)
+		lr_desc.state |= LR_EOI_INT;
+
+	return lr_desc;
+}
+
+static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
+			   struct vgic_lr lr_desc)
+{
+	u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+
+	if (lr_desc.state & LR_STATE_PENDING)
+		lr_val |= GICH_LR_PENDING_BIT;
+	if (lr_desc.state & LR_STATE_ACTIVE)
+		lr_val |= GICH_LR_ACTIVE_BIT;
+	if (lr_desc.state & LR_EOI_INT)
+		lr_val |= GICH_LR_EOI;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
+}
+
+static const struct vgic_ops vgic_ops = {
+	.get_lr			= vgic_v2_get_lr,
+	.set_lr			= vgic_v2_set_lr,
+};
+
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	return vgic_ops.get_lr(vcpu, lr);
+}
+
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
+			       struct vgic_lr vlr)
+{
+	vgic_ops.set_lr(vcpu, lr, vlr);
+}
+
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+
+	vlr.state = 0;
+	vgic_set_lr(vcpu, lr_nr, vlr);
+	clear_bit(lr_nr, vgic_cpu->lr_used);
+	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+}
 
 /*
  * An interrupt may have been disabled after being made pending on the
@@ -1007,12 +1057,12 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	int lr;
 
 	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
-		int irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
+		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-		if (!vgic_irq_is_enabled(vcpu, irq)) {
-			vgic_retire_lr(lr, irq, vgic_cpu);
-			if (vgic_irq_is_active(vcpu, irq))
-				vgic_irq_clear_active(vcpu, irq);
+		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
+			vgic_retire_lr(lr, vlr.irq, vcpu);
+			if (vgic_irq_is_active(vcpu, vlr.irq))
+				vgic_irq_clear_active(vcpu, vlr.irq);
 		}
 	}
 }
@@ -1024,6 +1074,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_lr vlr;
 	int lr;
 
 	/* Sanitize the input... */
@@ -1036,13 +1087,15 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 	lr = vgic_cpu->vgic_irq_lr_map[irq];
 
 	/* Do we have an active interrupt for the same CPUID? */
-	if (lr != LR_EMPTY &&
-	    (LR_CPUID(vgic_cpu->vgic_v2.vgic_lr[lr]) == sgi_source_id)) {
-		kvm_debug("LR%d piggyback for IRQ%d %x\n",
-			  lr, irq, vgic_cpu->vgic_v2.vgic_lr[lr]);
-		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-		vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_PENDING_BIT;
-		return true;
+	if (lr != LR_EMPTY) {
+		vlr = vgic_get_lr(vcpu, lr);
+		if (vlr.source == sgi_source_id) {
+			kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
+			BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
+			vlr.state |= LR_STATE_PENDING;
+			vgic_set_lr(vcpu, lr, vlr);
+			return true;
+		}
 	}
 
 	/* Try to use another LR for this interrupt */
@@ -1052,12 +1105,16 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-	vgic_cpu->vgic_v2.vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
 	vgic_cpu->vgic_irq_lr_map[irq] = lr;
 	set_bit(lr, vgic_cpu->lr_used);
 
+	vlr.irq = irq;
+	vlr.source = sgi_source_id;
+	vlr.state = LR_STATE_PENDING;
 	if (!vgic_irq_is_edge(vcpu, irq))
-		vgic_cpu->vgic_v2.vgic_lr[lr] |= GICH_LR_EOI;
+		vlr.state |= LR_EOI_INT;
+
+	vgic_set_lr(vcpu, lr, vlr);
 
 	return true;
 }
@@ -1180,21 +1237,23 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		 * Some level interrupts have been EOIed. Clear their
 		 * active bit.
 		 */
-		int lr, irq;
+		int lr;
 
 		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr,
 				 vgic_cpu->nr_lr) {
-			irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
+			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-			vgic_irq_clear_active(vcpu, irq);
-			vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_EOI;
+			vgic_irq_clear_active(vcpu, vlr.irq);
+			WARN_ON(vlr.state & LR_STATE_MASK);
+			vlr.state = 0;
+			vgic_set_lr(vcpu, lr, vlr);
 
 			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_is_pending(vcpu, irq)) {
-				vgic_cpu_irq_set(vcpu, irq);
+			if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) {
+				vgic_cpu_irq_set(vcpu, vlr.irq);
 				level_pending = true;
 			} else {
-				vgic_cpu_irq_clear(vcpu, irq);
+				vgic_cpu_irq_clear(vcpu, vlr.irq);
 			}
 
 			/*
@@ -1202,7 +1261,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 			 * been marked as empty.
 			 */
 			set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr);
-			vgic_cpu->vgic_v2.vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
 		}
 	}
 
@@ -1228,15 +1286,15 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	/* Clear mappings for empty LRs */
 	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
 			 vgic_cpu->nr_lr) {
-		int irq;
+		struct vgic_lr vlr;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
 			continue;
 
-		irq = vgic_cpu->vgic_v2.vgic_lr[lr] & GICH_LR_VIRTUALID;
+		vlr = vgic_get_lr(vcpu, lr);
 
-		BUG_ON(irq >= VGIC_NR_IRQS);
-		vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+		BUG_ON(vlr.irq >= VGIC_NR_IRQS);
+		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
 	}
 
 	/* Check if we still have something up our sleeve... */

From 32d98f25d20daedfa49c32a73e57630bdc0e74a0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 10:29:39 +0100
Subject: [PATCH 189/287] KVM: ARM: vgic: abstract access to the ELRSR bitmap

Move the GICH_ELRSR access to its own functions, and add them to
the vgic_ops structure.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 69bb2c9fbc11d9d4358fbb798db15c9092eb4d8c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  2 ++
 virt/kvm/arm/vgic.c    | 46 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 17bbe51b79a1..38864f5e47bc 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -84,6 +84,8 @@ struct vgic_lr {
 struct vgic_ops {
 	struct vgic_lr	(*get_lr)(const struct kvm_vcpu *, int);
 	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
+	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 11408fee600e..6dcc974fa2b4 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1015,9 +1015,32 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
 }
 
+static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+				  struct vgic_lr lr_desc)
+{
+	if (!(lr_desc.state & LR_STATE_MASK))
+		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+}
+
+static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+#endif
+	return val;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
+	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
+	.get_elrsr		= vgic_v2_get_elrsr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1031,6 +1054,17 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
 	vgic_ops.set_lr(vcpu, lr, vlr);
 }
 
+static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+			       struct vgic_lr vlr)
+{
+	vgic_ops.sync_lr_elrsr(vcpu, lr, vlr);
+}
+
+static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops.get_elrsr(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1260,7 +1294,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 			 * Despite being EOIed, the LR may not have
 			 * been marked as empty.
 			 */
-			set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr);
+			vgic_sync_lr_elrsr(vcpu, lr, vlr);
 		}
 	}
 
@@ -1278,14 +1312,17 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	u64 elrsr;
+	unsigned long *elrsr_ptr;
 	int lr, pending;
 	bool level_pending;
 
 	level_pending = vgic_process_maintenance(vcpu);
+	elrsr = vgic_get_elrsr(vcpu);
+	elrsr_ptr = (unsigned long *)&elrsr;
 
 	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
-			 vgic_cpu->nr_lr) {
+	for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) {
 		struct vgic_lr vlr;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
@@ -1298,8 +1335,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_v2.vgic_elrsr,
-				      vgic_cpu->nr_lr);
+	pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr);
 	if (level_pending || pending < vgic_cpu->nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }

From b874bb5cb4795de150d1157a0cf10138122cc093 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 10:33:43 +0100
Subject: [PATCH 190/287] KVM: ARM: vgic: abstract EISR bitmap access

Move the GICH_EISR access to its own function.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 8d6a0313c125c3c7b208b75695fe6ab00afab4c5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  1 +
 virt/kvm/arm/vgic.c    | 25 +++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 38864f5e47bc..ccb9b59818f4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -86,6 +86,7 @@ struct vgic_ops {
 	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
 	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
 	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
+	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6dcc974fa2b4..1e857e6e66b5 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1036,11 +1036,26 @@ static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
 	return val;
 }
 
+static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+#endif
+	return val;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
 	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
 	.get_elrsr		= vgic_v2_get_elrsr,
+	.get_eisr		= vgic_v2_get_eisr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1065,6 +1080,11 @@ static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
 	return vgic_ops.get_elrsr(vcpu);
 }
 
+static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops.get_eisr(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1271,10 +1291,11 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		 * Some level interrupts have been EOIed. Clear their
 		 * active bit.
 		 */
+		u64 eisr = vgic_get_eisr(vcpu);
+		unsigned long *eisr_ptr = (unsigned long *)&eisr;
 		int lr;
 
-		for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_v2.vgic_eisr,
-				 vgic_cpu->nr_lr) {
+		for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 			vgic_irq_clear_active(vcpu, vlr.irq);

From 3f79fb5a583821ca4c97d1b694285d36697e69c9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 11:02:10 +0100
Subject: [PATCH 191/287] KVM: ARM: vgic: abstract MISR decoding

Instead of directly dealing with the GICH_MISR bits, move the code to
its own function and use a couple of public flags to represent the
actual state.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 495dd859f304689a7cd5ef413c439cb090dc25e6)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  4 ++++
 virt/kvm/arm/vgic.c    | 26 +++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ccb9b59818f4..4857508b12e7 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -87,6 +87,7 @@ struct vgic_ops {
 	void	(*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
 	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
 	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
+	u32	(*get_interrupt_status)(const struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
@@ -165,6 +166,9 @@ struct vgic_cpu {
 
 #define LR_EMPTY	0xff
 
+#define INT_STATUS_EOI		(1 << 0)
+#define INT_STATUS_UNDERFLOW	(1 << 1)
+
 struct kvm;
 struct kvm_vcpu;
 struct kvm_run;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 1e857e6e66b5..c0bcc9735424 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1050,12 +1050,26 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
 	return val;
 }
 
+static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+	u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+	u32 ret = 0;
+
+	if (misr & GICH_MISR_EOI)
+		ret |= INT_STATUS_EOI;
+	if (misr & GICH_MISR_U)
+		ret |= INT_STATUS_UNDERFLOW;
+
+	return ret;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
 	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
 	.get_elrsr		= vgic_v2_get_elrsr,
 	.get_eisr		= vgic_v2_get_eisr,
+	.get_interrupt_status	= vgic_v2_get_interrupt_status,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1085,6 +1099,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
 	return vgic_ops.get_eisr(vcpu);
 }
 
+static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
+{
+	return vgic_ops.get_interrupt_status(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1282,11 +1301,12 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	u32 status = vgic_get_interrupt_status(vcpu);
 	bool level_pending = false;
 
-	kvm_debug("MISR = %08x\n", vgic_cpu->vgic_v2.vgic_misr);
+	kvm_debug("STATUS = %08x\n", status);
 
-	if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_EOI) {
+	if (status & INT_STATUS_EOI) {
 		/*
 		 * Some level interrupts have been EOIed. Clear their
 		 * active bit.
@@ -1319,7 +1339,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	if (vgic_cpu->vgic_v2.vgic_misr & GICH_MISR_U)
+	if (status & INT_STATUS_UNDERFLOW)
 		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 
 	return level_pending;

From 900f2c6eeaaa0daf752a06d654fed8d98d626f9f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 11:24:17 +0100
Subject: [PATCH 192/287] KVM: ARM: vgic: move underflow handling to vgic_ops

Move the code dealing with LR underflow handling to its own functions,
and make them accessible through vgic_ops.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 909d9b5025f149af6cfc304a76ad6218e6622cc0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  2 ++
 virt/kvm/arm/vgic.c    | 28 +++++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4857508b12e7..cdfa5d9567c6 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -88,6 +88,8 @@ struct vgic_ops {
 	u64	(*get_elrsr)(const struct kvm_vcpu *vcpu);
 	u64	(*get_eisr)(const struct kvm_vcpu *vcpu);
 	u32	(*get_interrupt_status)(const struct kvm_vcpu *vcpu);
+	void	(*enable_underflow)(struct kvm_vcpu *vcpu);
+	void	(*disable_underflow)(struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index c0bcc9735424..6d618e0b08a1 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1063,6 +1063,16 @@ static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+}
+
+static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
@@ -1070,6 +1080,8 @@ static const struct vgic_ops vgic_ops = {
 	.get_elrsr		= vgic_v2_get_elrsr,
 	.get_eisr		= vgic_v2_get_eisr,
 	.get_interrupt_status	= vgic_v2_get_interrupt_status,
+	.enable_underflow	= vgic_v2_enable_underflow,
+	.disable_underflow	= vgic_v2_disable_underflow,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1104,6 +1116,16 @@ static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
 	return vgic_ops.get_interrupt_status(vcpu);
 }
 
+static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vgic_ops.enable_underflow(vcpu);
+}
+
+static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vgic_ops.disable_underflow(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1285,9 +1307,9 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 epilog:
 	if (overflow) {
-		vgic_cpu->vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+		vgic_enable_underflow(vcpu);
 	} else {
-		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+		vgic_disable_underflow(vcpu);
 		/*
 		 * We're about to run this VCPU, and we've consumed
 		 * everything the distributor had in store for
@@ -1340,7 +1362,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 	}
 
 	if (status & INT_STATUS_UNDERFLOW)
-		vgic_cpu->vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+		vgic_disable_underflow(vcpu);
 
 	return level_pending;
 }

From 32a0e35ba5210fab6b6379971bba24886b69952a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Feb 2014 17:48:10 +0000
Subject: [PATCH 193/287] KVM: ARM: vgic: abstract VMCR access

Instead of directly messing with with the GICH_VMCR bits for the CPU
interface save/restore code, add accessors that encode/decode the
entire set of registers exposed by VMCR.

Not the most efficient thing, but given that this code is only used
by the save/restore code, performance is far from being critical.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit beee38b9d0c0ea6cf2a7f35c3108f7d8281d4545)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  9 ++++++
 virt/kvm/arm/vgic.c    | 69 ++++++++++++++++++++++++++++++++----------
 2 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index cdfa5d9567c6..f51580043170 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -81,6 +81,13 @@ struct vgic_lr {
 	u8	state;
 };
 
+struct vgic_vmcr {
+	u32	ctlr;
+	u32	abpr;
+	u32	bpr;
+	u32	pmr;
+};
+
 struct vgic_ops {
 	struct vgic_lr	(*get_lr)(const struct kvm_vcpu *, int);
 	void	(*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
@@ -90,6 +97,8 @@ struct vgic_ops {
 	u32	(*get_interrupt_status)(const struct kvm_vcpu *vcpu);
 	void	(*enable_underflow)(struct kvm_vcpu *vcpu);
 	void	(*disable_underflow)(struct kvm_vcpu *vcpu);
+	void	(*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+	void	(*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6d618e0b08a1..5c706393956d 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -100,8 +100,10 @@ static void vgic_kick_vcpus(struct kvm *kvm);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
-static u32 vgic_nr_lr;
+static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 
+static u32 vgic_nr_lr;
 static unsigned int vgic_maint_irq;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
@@ -1073,6 +1075,28 @@ static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
 }
 
+static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
+	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
+}
+
+static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr;
+
+	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
+	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
+	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
@@ -1082,6 +1106,8 @@ static const struct vgic_ops vgic_ops = {
 	.get_interrupt_status	= vgic_v2_get_interrupt_status,
 	.enable_underflow	= vgic_v2_enable_underflow,
 	.disable_underflow	= vgic_v2_disable_underflow,
+	.get_vmcr		= vgic_v2_get_vmcr,
+	.set_vmcr		= vgic_v2_set_vmcr,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1126,6 +1152,16 @@ static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
 	vgic_ops.disable_underflow(vcpu);
 }
 
+static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+	vgic_ops.get_vmcr(vcpu, vmcr);
+}
+
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+	vgic_ops.set_vmcr(vcpu, vmcr);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1879,39 +1915,40 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
 static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
 				 struct kvm_exit_mmio *mmio, phys_addr_t offset)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	u32 reg, mask = 0, shift = 0;
 	bool updated = false;
+	struct vgic_vmcr vmcr;
+	u32 *vmcr_field;
+	u32 reg;
+
+	vgic_get_vmcr(vcpu, &vmcr);
 
 	switch (offset & ~0x3) {
 	case GIC_CPU_CTRL:
-		mask = GICH_VMCR_CTRL_MASK;
-		shift = GICH_VMCR_CTRL_SHIFT;
+		vmcr_field = &vmcr.ctlr;
 		break;
 	case GIC_CPU_PRIMASK:
-		mask = GICH_VMCR_PRIMASK_MASK;
-		shift = GICH_VMCR_PRIMASK_SHIFT;
+		vmcr_field = &vmcr.pmr;
 		break;
 	case GIC_CPU_BINPOINT:
-		mask = GICH_VMCR_BINPOINT_MASK;
-		shift = GICH_VMCR_BINPOINT_SHIFT;
+		vmcr_field = &vmcr.bpr;
 		break;
 	case GIC_CPU_ALIAS_BINPOINT:
-		mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
-		shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+		vmcr_field = &vmcr.abpr;
 		break;
+	default:
+		BUG();
 	}
 
 	if (!mmio->is_write) {
-		reg = (vgic_cpu->vgic_v2.vgic_vmcr & mask) >> shift;
+		reg = *vmcr_field;
 		mmio_data_write(mmio, ~0, reg);
 	} else {
 		reg = mmio_data_read(mmio, ~0);
-		reg = (reg << shift) & mask;
-		if (reg != (vgic_cpu->vgic_v2.vgic_vmcr & mask))
+		if (reg != *vmcr_field) {
+			*vmcr_field = reg;
+			vgic_set_vmcr(vcpu, &vmcr);
 			updated = true;
-		vgic_cpu->vgic_v2.vgic_vmcr &= ~mask;
-		vgic_cpu->vgic_v2.vgic_vmcr |= reg;
+		}
 	}
 	return updated;
 }

From 6d6775fa71660fd7c58741881d6bbbfd2be68f3a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Jun 2013 11:36:38 +0100
Subject: [PATCH 194/287] KVM: ARM: vgic: introduce vgic_enable

Move the code dealing with enabling the VGIC on to vgic_ops.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit da8dafd1777cdd93091207952297d221a88e6479)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  1 +
 virt/kvm/arm/vgic.c    | 29 +++++++++++++++++++++--------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f51580043170..2228973ea8e4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -99,6 +99,7 @@ struct vgic_ops {
 	void	(*disable_underflow)(struct kvm_vcpu *vcpu);
 	void	(*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 	void	(*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+	void	(*enable)(struct kvm_vcpu *vcpu);
 };
 
 struct vgic_dist {
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 5c706393956d..70f674bb13a1 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1097,6 +1097,19 @@ static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
 }
 
+static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+
+	/* Get the show on the road... */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
 static const struct vgic_ops vgic_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
@@ -1108,6 +1121,7 @@ static const struct vgic_ops vgic_ops = {
 	.disable_underflow	= vgic_v2_disable_underflow,
 	.get_vmcr		= vgic_v2_get_vmcr,
 	.set_vmcr		= vgic_v2_set_vmcr,
+	.enable			= vgic_v2_enable,
 };
 
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -1162,6 +1176,11 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
 	vgic_ops.set_vmcr(vcpu, vmcr);
 }
 
+static inline void vgic_enable(struct kvm_vcpu *vcpu)
+{
+	vgic_ops.enable(vcpu);
+}
+
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1624,15 +1643,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
 	}
 
-	/*
-	 * By forcing VMCR to zero, the GIC will restore the binary
-	 * points to their reset values. Anything else resets to zero
-	 * anyway.
-	 */
-	vgic_cpu->vgic_v2.vgic_vmcr = 0;
-
 	vgic_cpu->nr_lr = vgic_nr_lr;
-	vgic_cpu->vgic_v2.vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
+
+	vgic_enable(vcpu);
 
 	return 0;
 }

From 6e707c119719a9261ac7305194c0004d11bb594a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 18 Jun 2013 19:17:28 +0100
Subject: [PATCH 195/287] KVM: ARM: introduce vgic_params structure

Move all the data specific to a given GIC implementation into its own
little structure.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit ca85f623e37d096206e092ef037a145a60fa7f85)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 11 +++++++
 virt/kvm/arm/vgic.c    | 70 ++++++++++++++++++++----------------------
 2 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 2228973ea8e4..ce2e14226dbf 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -102,6 +102,17 @@ struct vgic_ops {
 	void	(*enable)(struct kvm_vcpu *vcpu);
 };
 
+struct vgic_params {
+	/* Physical address of vgic virtual cpu interface */
+	phys_addr_t	vcpu_base;
+	/* Number of list registers */
+	u32		nr_lr;
+	/* Interrupt number */
+	unsigned int	maint_irq;
+	/* Virtual control interface base address */
+	void __iomem	*vctrl_base;
+};
+
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
 	spinlock_t		lock;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 70f674bb13a1..f3a996d0a100 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -76,14 +76,6 @@
 #define IMPLEMENTER_ARM		0x43b
 #define GICC_ARCH_VERSION_V2	0x2
 
-/* Physical address of vgic virtual cpu interface */
-static phys_addr_t vgic_vcpu_base;
-
-/* Virtual control interface base address */
-static void __iomem *vgic_vctrl_base;
-
-static struct device_node *vgic_node;
-
 #define ACCESS_READ_VALUE	(1 << 0)
 #define ACCESS_READ_RAZ		(0 << 0)
 #define ACCESS_READ_MASK(x)	((x) & (1 << 0))
@@ -103,8 +95,7 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 
-static u32 vgic_nr_lr;
-static unsigned int vgic_maint_irq;
+static struct vgic_params vgic;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
@@ -1206,7 +1197,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	int lr;
 
-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+	for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) {
 		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
@@ -1250,8 +1241,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
 	/* Try to use another LR for this interrupt */
 	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-			       vgic_cpu->nr_lr);
-	if (lr >= vgic_cpu->nr_lr)
+			       vgic.nr_lr);
+	if (lr >= vgic.nr_lr)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
@@ -1377,7 +1368,6 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	u32 status = vgic_get_interrupt_status(vcpu);
 	bool level_pending = false;
 
@@ -1392,7 +1382,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		unsigned long *eisr_ptr = (unsigned long *)&eisr;
 		int lr;
 
-		for_each_set_bit(lr, eisr_ptr, vgic_cpu->nr_lr) {
+		for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 			vgic_irq_clear_active(vcpu, vlr.irq);
@@ -1440,7 +1430,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	elrsr_ptr = (unsigned long *)&elrsr;
 
 	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, elrsr_ptr, vgic_cpu->nr_lr) {
+	for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) {
 		struct vgic_lr vlr;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
@@ -1453,8 +1443,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit(elrsr_ptr, vgic_cpu->nr_lr);
-	if (level_pending || pending < vgic_cpu->nr_lr)
+	pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr);
+	if (level_pending || pending < vgic.nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }
 
@@ -1643,7 +1633,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
 	}
 
-	vgic_cpu->nr_lr = vgic_nr_lr;
+	/*
+	 * Store the number of LRs per vcpu, so we don't have to go
+	 * all the way to the distributor structure to find out. Only
+	 * assembly code should use this one.
+	 */
+	vgic_cpu->nr_lr = vgic.nr_lr;
 
 	vgic_enable(vcpu);
 
@@ -1652,7 +1647,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void vgic_init_maintenance_interrupt(void *info)
 {
-	enable_percpu_irq(vgic_maint_irq, 0);
+	enable_percpu_irq(vgic.maint_irq, 0);
 }
 
 static int vgic_cpu_notify(struct notifier_block *self,
@@ -1665,7 +1660,7 @@ static int vgic_cpu_notify(struct notifier_block *self,
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic_maint_irq);
+		disable_percpu_irq(vgic.maint_irq);
 		break;
 	}
 
@@ -1681,6 +1676,7 @@ int kvm_vgic_hyp_init(void)
 	int ret;
 	struct resource vctrl_res;
 	struct resource vcpu_res;
+	struct device_node *vgic_node;
 
 	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
 	if (!vgic_node) {
@@ -1688,17 +1684,17 @@ int kvm_vgic_hyp_init(void)
 		return -ENODEV;
 	}
 
-	vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic_maint_irq) {
+	vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic.maint_irq) {
 		kvm_err("error getting vgic maintenance irq from DT\n");
 		ret = -ENXIO;
 		goto out;
 	}
 
-	ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
+	ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler,
 				 "vgic", kvm_get_running_vcpus());
 	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
+		kvm_err("Cannot register interrupt %d\n", vgic.maint_irq);
 		goto out;
 	}
 
@@ -1714,18 +1710,18 @@ int kvm_vgic_hyp_init(void)
 		goto out_free_irq;
 	}
 
-	vgic_vctrl_base = of_iomap(vgic_node, 2);
-	if (!vgic_vctrl_base) {
+	vgic.vctrl_base = of_iomap(vgic_node, 2);
+	if (!vgic.vctrl_base) {
 		kvm_err("Cannot ioremap VCTRL\n");
 		ret = -ENOMEM;
 		goto out_free_irq;
 	}
 
-	vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
-	vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
+	vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR);
+	vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1;
 
-	ret = create_hyp_io_mappings(vgic_vctrl_base,
-				     vgic_vctrl_base + resource_size(&vctrl_res),
+	ret = create_hyp_io_mappings(vgic.vctrl_base,
+				     vgic.vctrl_base + resource_size(&vctrl_res),
 				     vctrl_res.start);
 	if (ret) {
 		kvm_err("Cannot map VCTRL into hyp\n");
@@ -1733,7 +1729,7 @@ int kvm_vgic_hyp_init(void)
 	}
 
 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vctrl_res.start, vgic_maint_irq);
+		 vctrl_res.start, vgic.maint_irq);
 	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 
 	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
@@ -1741,14 +1737,14 @@ int kvm_vgic_hyp_init(void)
 		ret = -ENXIO;
 		goto out_unmap;
 	}
-	vgic_vcpu_base = vcpu_res.start;
+	vgic.vcpu_base = vcpu_res.start;
 
 	goto out;
 
 out_unmap:
-	iounmap(vgic_vctrl_base);
+	iounmap(vgic.vctrl_base);
 out_free_irq:
-	free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
+	free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus());
 out:
 	of_node_put(vgic_node);
 	return ret;
@@ -1783,7 +1779,7 @@ int kvm_vgic_init(struct kvm *kvm)
 	}
 
 	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
-				    vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+				    vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE);
 	if (ret) {
 		kvm_err("Unable to remap VGIC CPU to VCPU\n");
 		goto out;
@@ -1829,7 +1825,7 @@ int kvm_vgic_create(struct kvm *kvm)
 	}
 
 	spin_lock_init(&kvm->arch.vgic.lock);
-	kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
+	kvm->arch.vgic.vctrl_base = vgic.vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 

From ab8bb40339927ab4e5875330461a9a9ffaf42a56 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 4 Feb 2014 18:13:03 +0000
Subject: [PATCH 196/287] KVM: ARM: vgic: split GICv2 backend from the main
 vgic code

Brutally hack the innocent vgic code, and move the GICv2 specific code
to its own file, using vgic_ops and vgic_params as a way to pass
information between the two blocks.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 8f186d522c69bb18dd9b93a634da4953228c67d4)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Makefile   |   1 +
 arch/arm64/kvm/Makefile |   2 +-
 include/kvm/arm_vgic.h  |  11 +-
 virt/kvm/arm/vgic-v2.c  | 248 +++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic.c     | 267 +++++++---------------------------------
 5 files changed, 304 insertions(+), 225 deletions(-)
 create mode 100644 virt/kvm/arm/vgic-v2.c

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 789bca9e64a7..f7057ed045b6 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -21,4 +21,5 @@ obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
 obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 72a9fd583ad3..7e92952d139e 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -19,5 +19,5 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ce2e14226dbf..d8d52a9ca6a1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -32,7 +32,8 @@
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
 #define VGIC_NR_SHARED_IRQS	(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
 #define VGIC_MAX_CPUS		KVM_MAX_VCPUS
-#define VGIC_MAX_LRS		(1 << 6)
+
+#define VGIC_V2_MAX_LRS		(1 << 6)
 
 /* Sanity checks... */
 #if (VGIC_MAX_CPUS > 8)
@@ -162,7 +163,7 @@ struct vgic_v2_cpu_if {
 	u32		vgic_eisr[2];	/* Saved only */
 	u32		vgic_elrsr[2];	/* Saved only */
 	u32		vgic_apr;
-	u32		vgic_lr[VGIC_MAX_LRS];
+	u32		vgic_lr[VGIC_V2_MAX_LRS];
 };
 
 struct vgic_cpu {
@@ -175,7 +176,7 @@ struct vgic_cpu {
 	DECLARE_BITMAP(	pending_shared, VGIC_NR_SHARED_IRQS);
 
 	/* Bitmap of used/free list registers */
-	DECLARE_BITMAP(	lr_used, VGIC_MAX_LRS);
+	DECLARE_BITMAP(	lr_used, VGIC_V2_MAX_LRS);
 
 	/* Number of list registers on this CPU */
 	int		nr_lr;
@@ -214,6 +215,10 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
 #define vgic_initialized(k)	((k)->arch.vgic.ready)
 
+int vgic_v2_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params);
+
 #else
 static inline int kvm_vgic_hyp_init(void)
 {
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
new file mode 100644
index 000000000000..940418ebd0d0
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	struct vgic_lr lr_desc;
+	u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+
+	lr_desc.irq	= val & GICH_LR_VIRTUALID;
+	if (lr_desc.irq <= 15)
+		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+	else
+		lr_desc.source = 0;
+	lr_desc.state	= 0;
+
+	if (val & GICH_LR_PENDING_BIT)
+		lr_desc.state |= LR_STATE_PENDING;
+	if (val & GICH_LR_ACTIVE_BIT)
+		lr_desc.state |= LR_STATE_ACTIVE;
+	if (val & GICH_LR_EOI)
+		lr_desc.state |= LR_EOI_INT;
+
+	return lr_desc;
+}
+
+static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
+			   struct vgic_lr lr_desc)
+{
+	u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+
+	if (lr_desc.state & LR_STATE_PENDING)
+		lr_val |= GICH_LR_PENDING_BIT;
+	if (lr_desc.state & LR_STATE_ACTIVE)
+		lr_val |= GICH_LR_ACTIVE_BIT;
+	if (lr_desc.state & LR_EOI_INT)
+		lr_val |= GICH_LR_EOI;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
+}
+
+static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+				  struct vgic_lr lr_desc)
+{
+	if (!(lr_desc.state & LR_STATE_MASK))
+		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+}
+
+static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+#endif
+	return val;
+}
+
+static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+#if BITS_PER_LONG == 64
+	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
+	val <<= 32;
+	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
+#else
+	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+#endif
+	return val;
+}
+
+static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+	u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+	u32 ret = 0;
+
+	if (misr & GICH_MISR_EOI)
+		ret |= INT_STATUS_EOI;
+	if (misr & GICH_MISR_U)
+		ret |= INT_STATUS_UNDERFLOW;
+
+	return ret;
+}
+
+static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+}
+
+static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+}
+
+static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
+	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
+}
+
+static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr;
+
+	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
+	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
+	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
+
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
+static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+
+	/* Get the show on the road... */
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v2_ops = {
+	.get_lr			= vgic_v2_get_lr,
+	.set_lr			= vgic_v2_set_lr,
+	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
+	.get_elrsr		= vgic_v2_get_elrsr,
+	.get_eisr		= vgic_v2_get_eisr,
+	.get_interrupt_status	= vgic_v2_get_interrupt_status,
+	.enable_underflow	= vgic_v2_enable_underflow,
+	.disable_underflow	= vgic_v2_disable_underflow,
+	.get_vmcr		= vgic_v2_get_vmcr,
+	.set_vmcr		= vgic_v2_set_vmcr,
+	.enable			= vgic_v2_enable,
+};
+
+static struct vgic_params vgic_v2_params;
+
+/**
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
+ * @node:	pointer to the DT node
+ * @ops: 	address of a pointer to the GICv2 operations
+ * @params:	address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv2 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v2_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params)
+{
+	int ret;
+	struct resource vctrl_res;
+	struct resource vcpu_res;
+	struct vgic_params *vgic = &vgic_v2_params;
+
+	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic->maint_irq) {
+		kvm_err("error getting vgic maintenance irq from DT\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
+	if (ret) {
+		kvm_err("Cannot obtain GICH resource\n");
+		goto out;
+	}
+
+	vgic->vctrl_base = of_iomap(vgic_node, 2);
+	if (!vgic->vctrl_base) {
+		kvm_err("Cannot ioremap GICH\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
+	vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
+
+	ret = create_hyp_io_mappings(vgic->vctrl_base,
+				     vgic->vctrl_base + resource_size(&vctrl_res),
+				     vctrl_res.start);
+	if (ret) {
+		kvm_err("Cannot map VCTRL into hyp\n");
+		goto out_unmap;
+	}
+
+	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+		kvm_err("Cannot obtain GICV resource\n");
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+	vgic->vcpu_base = vcpu_res.start;
+
+	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+		 vctrl_res.start, vgic->maint_irq);
+
+	*ops = &vgic_v2_ops;
+	*params = vgic;
+	goto out;
+
+out_unmap:
+	iounmap(vgic->vctrl_base);
+out:
+	of_node_put(vgic_node);
+	return ret;
+}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index f3a996d0a100..e4b9cbbbee4c 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -95,7 +95,8 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 
-static struct vgic_params vgic;
+static const struct vgic_ops *vgic_ops;
+static const struct vgic_params *vgic;
 
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
@@ -971,205 +972,61 @@ static void vgic_update_state(struct kvm *kvm)
 	}
 }
 
-static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
-{
-	struct vgic_lr lr_desc;
-	u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
-
-	lr_desc.irq	= val & GICH_LR_VIRTUALID;
-	if (lr_desc.irq <= 15)
-		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
-	else
-		lr_desc.source = 0;
-	lr_desc.state	= 0;
-
-	if (val & GICH_LR_PENDING_BIT)
-		lr_desc.state |= LR_STATE_PENDING;
-	if (val & GICH_LR_ACTIVE_BIT)
-		lr_desc.state |= LR_STATE_ACTIVE;
-	if (val & GICH_LR_EOI)
-		lr_desc.state |= LR_EOI_INT;
-
-	return lr_desc;
-}
-
-static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
-			   struct vgic_lr lr_desc)
-{
-	u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
-
-	if (lr_desc.state & LR_STATE_PENDING)
-		lr_val |= GICH_LR_PENDING_BIT;
-	if (lr_desc.state & LR_STATE_ACTIVE)
-		lr_val |= GICH_LR_ACTIVE_BIT;
-	if (lr_desc.state & LR_EOI_INT)
-		lr_val |= GICH_LR_EOI;
-
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
-}
-
-static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
-				  struct vgic_lr lr_desc)
-{
-	if (!(lr_desc.state & LR_STATE_MASK))
-		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
-}
-
-static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
-{
-	u64 val;
-
-#if BITS_PER_LONG == 64
-	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
-	val <<= 32;
-	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
-#else
-	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
-#endif
-	return val;
-}
-
-static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
-{
-	u64 val;
-
-#if BITS_PER_LONG == 64
-	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
-	val <<= 32;
-	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
-#else
-	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
-#endif
-	return val;
-}
-
-static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
-{
-	u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
-	u32 ret = 0;
-
-	if (misr & GICH_MISR_EOI)
-		ret |= INT_STATUS_EOI;
-	if (misr & GICH_MISR_U)
-		ret |= INT_STATUS_UNDERFLOW;
-
-	return ret;
-}
-
-static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
-}
-
-static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
-}
-
-static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
-{
-	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
-
-	vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
-	vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
-	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
-	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
-}
-
-static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
-{
-	u32 vmcr;
-
-	vmcr  = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
-	vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
-	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
-	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
-
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
-}
-
-static void vgic_v2_enable(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * By forcing VMCR to zero, the GIC will restore the binary
-	 * points to their reset values. Anything else resets to zero
-	 * anyway.
-	 */
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
-
-	/* Get the show on the road... */
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
-}
-
-static const struct vgic_ops vgic_ops = {
-	.get_lr			= vgic_v2_get_lr,
-	.set_lr			= vgic_v2_set_lr,
-	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
-	.get_elrsr		= vgic_v2_get_elrsr,
-	.get_eisr		= vgic_v2_get_eisr,
-	.get_interrupt_status	= vgic_v2_get_interrupt_status,
-	.enable_underflow	= vgic_v2_enable_underflow,
-	.disable_underflow	= vgic_v2_disable_underflow,
-	.get_vmcr		= vgic_v2_get_vmcr,
-	.set_vmcr		= vgic_v2_set_vmcr,
-	.enable			= vgic_v2_enable,
-};
-
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
 {
-	return vgic_ops.get_lr(vcpu, lr);
+	return vgic_ops->get_lr(vcpu, lr);
 }
 
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
 			       struct vgic_lr vlr)
 {
-	vgic_ops.set_lr(vcpu, lr, vlr);
+	vgic_ops->set_lr(vcpu, lr, vlr);
 }
 
 static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 			       struct vgic_lr vlr)
 {
-	vgic_ops.sync_lr_elrsr(vcpu, lr, vlr);
+	vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
 }
 
 static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
 {
-	return vgic_ops.get_elrsr(vcpu);
+	return vgic_ops->get_elrsr(vcpu);
 }
 
 static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
 {
-	return vgic_ops.get_eisr(vcpu);
+	return vgic_ops->get_eisr(vcpu);
 }
 
 static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
 {
-	return vgic_ops.get_interrupt_status(vcpu);
+	return vgic_ops->get_interrupt_status(vcpu);
 }
 
 static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
 {
-	vgic_ops.enable_underflow(vcpu);
+	vgic_ops->enable_underflow(vcpu);
 }
 
 static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
 {
-	vgic_ops.disable_underflow(vcpu);
+	vgic_ops->disable_underflow(vcpu);
 }
 
 static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
 {
-	vgic_ops.get_vmcr(vcpu, vmcr);
+	vgic_ops->get_vmcr(vcpu, vmcr);
 }
 
 static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
 {
-	vgic_ops.set_vmcr(vcpu, vmcr);
+	vgic_ops->set_vmcr(vcpu, vmcr);
 }
 
 static inline void vgic_enable(struct kvm_vcpu *vcpu)
 {
-	vgic_ops.enable(vcpu);
+	vgic_ops->enable(vcpu);
 }
 
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
@@ -1197,7 +1054,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	int lr;
 
-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic.nr_lr) {
+	for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
 		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
@@ -1241,8 +1098,8 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
 	/* Try to use another LR for this interrupt */
 	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-			       vgic.nr_lr);
-	if (lr >= vgic.nr_lr)
+			       vgic->nr_lr);
+	if (lr >= vgic->nr_lr)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
@@ -1382,7 +1239,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		unsigned long *eisr_ptr = (unsigned long *)&eisr;
 		int lr;
 
-		for_each_set_bit(lr, eisr_ptr, vgic.nr_lr) {
+		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 			vgic_irq_clear_active(vcpu, vlr.irq);
@@ -1430,7 +1287,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	elrsr_ptr = (unsigned long *)&elrsr;
 
 	/* Clear mappings for empty LRs */
-	for_each_set_bit(lr, elrsr_ptr, vgic.nr_lr) {
+	for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
 		struct vgic_lr vlr;
 
 		if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
@@ -1443,8 +1300,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* Check if we still have something up our sleeve... */
-	pending = find_first_zero_bit(elrsr_ptr, vgic.nr_lr);
-	if (level_pending || pending < vgic.nr_lr)
+	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
+	if (level_pending || pending < vgic->nr_lr)
 		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
 }
 
@@ -1638,7 +1495,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	 * all the way to the distributor structure to find out. Only
 	 * assembly code should use this one.
 	 */
-	vgic_cpu->nr_lr = vgic.nr_lr;
+	vgic_cpu->nr_lr = vgic->nr_lr;
 
 	vgic_enable(vcpu);
 
@@ -1647,7 +1504,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void vgic_init_maintenance_interrupt(void *info)
 {
-	enable_percpu_irq(vgic.maint_irq, 0);
+	enable_percpu_irq(vgic->maint_irq, 0);
 }
 
 static int vgic_cpu_notify(struct notifier_block *self,
@@ -1660,7 +1517,7 @@ static int vgic_cpu_notify(struct notifier_block *self,
 		break;
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic.maint_irq);
+		disable_percpu_irq(vgic->maint_irq);
 		break;
 	}
 
@@ -1671,31 +1528,36 @@ static struct notifier_block vgic_cpu_nb = {
 	.notifier_call = vgic_cpu_notify,
 };
 
+static const struct of_device_id vgic_ids[] = {
+	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{},
+};
+
 int kvm_vgic_hyp_init(void)
 {
-	int ret;
-	struct resource vctrl_res;
-	struct resource vcpu_res;
+	const struct of_device_id *matched_id;
+	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+			  const struct vgic_params **);
 	struct device_node *vgic_node;
+	int ret;
 
-	vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
+	vgic_node = of_find_matching_node_and_match(NULL,
+						    vgic_ids, &matched_id);
 	if (!vgic_node) {
-		kvm_err("error: no compatible vgic node in DT\n");
+		kvm_err("error: no compatible GIC node found\n");
 		return -ENODEV;
 	}
 
-	vgic.maint_irq = irq_of_parse_and_map(vgic_node, 0);
-	if (!vgic.maint_irq) {
-		kvm_err("error getting vgic maintenance irq from DT\n");
-		ret = -ENXIO;
-		goto out;
-	}
+	vgic_probe = matched_id->data;
+	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+	if (ret)
+		return ret;
 
-	ret = request_percpu_irq(vgic.maint_irq, vgic_maintenance_handler,
+	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
 				 "vgic", kvm_get_running_vcpus());
 	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic.maint_irq);
-		goto out;
+		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+		return ret;
 	}
 
 	ret = __register_cpu_notifier(&vgic_cpu_nb);
@@ -1704,49 +1566,12 @@ int kvm_vgic_hyp_init(void)
 		goto out_free_irq;
 	}
 
-	ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
-	if (ret) {
-		kvm_err("Cannot obtain VCTRL resource\n");
-		goto out_free_irq;
-	}
-
-	vgic.vctrl_base = of_iomap(vgic_node, 2);
-	if (!vgic.vctrl_base) {
-		kvm_err("Cannot ioremap VCTRL\n");
-		ret = -ENOMEM;
-		goto out_free_irq;
-	}
-
-	vgic.nr_lr = readl_relaxed(vgic.vctrl_base + GICH_VTR);
-	vgic.nr_lr = (vgic.nr_lr & 0x3f) + 1;
-
-	ret = create_hyp_io_mappings(vgic.vctrl_base,
-				     vgic.vctrl_base + resource_size(&vctrl_res),
-				     vctrl_res.start);
-	if (ret) {
-		kvm_err("Cannot map VCTRL into hyp\n");
-		goto out_unmap;
-	}
-
-	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
-		 vctrl_res.start, vgic.maint_irq);
 	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 
-	if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
-		kvm_err("Cannot obtain VCPU resource\n");
-		ret = -ENXIO;
-		goto out_unmap;
-	}
-	vgic.vcpu_base = vcpu_res.start;
+	return 0;
 
-	goto out;
-
-out_unmap:
-	iounmap(vgic.vctrl_base);
 out_free_irq:
-	free_percpu_irq(vgic.maint_irq, kvm_get_running_vcpus());
-out:
-	of_node_put(vgic_node);
+	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
 	return ret;
 }
 
@@ -1779,7 +1604,7 @@ int kvm_vgic_init(struct kvm *kvm)
 	}
 
 	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
-				    vgic.vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+				    vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
 	if (ret) {
 		kvm_err("Unable to remap VGIC CPU to VCPU\n");
 		goto out;
@@ -1825,7 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm)
 	}
 
 	spin_lock_init(&kvm->arch.vgic.lock);
-	kvm->arch.vgic.vctrl_base = vgic.vctrl_base;
+	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 

From 79f17b6074e34191952a9249b061012a23f3888a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 15 May 2014 10:03:25 +0100
Subject: [PATCH 197/287] KVM: ARM: vgic: revisit implementation of
 irqchip_in_kernel

So far, irqchip_in_kernel() was implemented by testing the value of
vctrl_base, which worked fine with GICv2.

With GICv3, this field is useless, as we're using system registers
instead of a emmory mapped interface. To solve this, add a boolean
flag indicating if the we're using a vgic or not.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f982cf4e9c37b19478c7bc6e0484a43a7e78cf57)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 3 ++-
 virt/kvm/arm/vgic.c    | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index d8d52a9ca6a1..f6b9fec6fcac 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -117,6 +117,7 @@ struct vgic_params {
 struct vgic_dist {
 #ifdef CONFIG_KVM_ARM_VGIC
 	spinlock_t		lock;
+	bool			in_kernel;
 	bool			ready;
 
 	/* Virtual control interface mapping */
@@ -212,7 +213,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		      struct kvm_exit_mmio *mmio);
 
-#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
+#define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)	((k)->arch.vgic.ready)
 
 int vgic_v2_probe(struct device_node *vgic_node,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e4b9cbbbee4c..1348e74c4d11 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1650,6 +1650,7 @@ int kvm_vgic_create(struct kvm *kvm)
 	}
 
 	spin_lock_init(&kvm->arch.vgic.lock);
+	kvm->arch.vgic.in_kernel = true;
 	kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;

From cab0beb93ff52f5a47cf9a1a03c9c73d473d28d6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 26 Jun 2013 15:16:40 +0100
Subject: [PATCH 198/287] arm64: KVM: remove __kvm_hyp_code_{start,end} from
 hyp.S

We already have __hyp_text_{start,end} to express the boundaries
of the HYP text section, and __kvm_hyp_code_{start,end} are getting
in the way of a more modular world switch code.

Just turn __kvm_hyp_code_{start,end} into #defines mapping the
linker-emited symbols.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 45451914c875bba44903ce4f1445e047b7992bf7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h | 6 ++++--
 arch/arm64/include/asm/virt.h    | 4 ++++
 arch/arm64/kvm/hyp.S             | 6 ------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 9fcd54b1e16d..d0bfc4ba82c0 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -18,6 +18,8 @@
 #ifndef __ARM_KVM_ASM_H__
 #define __ARM_KVM_ASM_H__
 
+#include <asm/virt.h>
+
 /*
  * 0 is reserved as an invalid value.
  * Order *must* be kept in sync with the hyp switch code.
@@ -96,8 +98,8 @@ extern char __kvm_hyp_init_end[];
 
 extern char __kvm_hyp_vector[];
 
-extern char __kvm_hyp_code_start[];
-extern char __kvm_hyp_code_end[];
+#define	__kvm_hyp_code_start	__hyp_text_start
+#define	__kvm_hyp_code_end	__hyp_text_end
 
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 215ad4649dd7..7a5df5252dd7 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void)
 	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 877d82a134bc..9c5d0acb3654 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -36,9 +36,6 @@
 	.pushsection	.hyp.text, "ax"
 	.align	PAGE_SHIFT
 
-__kvm_hyp_code_start:
-	.globl __kvm_hyp_code_start
-
 .macro save_common_regs
 	// x2: base address for cpu context
 	// x3: tmp register
@@ -880,7 +877,4 @@ ENTRY(__kvm_hyp_vector)
 	ventry	el1_error_invalid		// Error 32-bit EL1
 ENDPROC(__kvm_hyp_vector)
 
-__kvm_hyp_code_end:
-	.globl	__kvm_hyp_code_end
-
 	.popsection

From fd17281069b65b1672718613d8c89cc9a409bfcd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 21 Jun 2013 11:57:56 +0100
Subject: [PATCH 199/287] arm64: KVM: split GICv2 world switch from hyp code

Move the GICv2 world switch code into its own file, and add the
necessary indirection to the arm64 switch code.

Also introduce a new type field to the vgic_params structure.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 1a9b13056dde7e3092304d6041ccc60a913042ea)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   |   5 ++
 arch/arm64/include/asm/kvm_asm.h  |   4 +
 arch/arm64/include/asm/kvm_host.h |  21 +++++
 arch/arm64/kernel/asm-offsets.c   |   3 +
 arch/arm64/kvm/Makefile           |   4 +-
 arch/arm64/kvm/hyp.S              | 104 ++++-------------------
 arch/arm64/kvm/vgic-v2-switch.S   | 133 ++++++++++++++++++++++++++++++
 include/kvm/arm_vgic.h            |   7 +-
 virt/kvm/arm/vgic-v2.c            |   1 +
 virt/kvm/arm/vgic.c               |   3 +
 10 files changed, 195 insertions(+), 90 deletions(-)
 create mode 100644 arch/arm64/kvm/vgic-v2-switch.S

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index dc4e3edf39cc..6dfb404f6c46 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,6 +225,11 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 	return 0;
 }
 
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	BUG_ON(vgic->type != VGIC_V2);
+}
+
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index d0bfc4ba82c0..6252264341c8 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -105,6 +105,10 @@ extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern char __save_vgic_v2_state[];
+extern char __restore_vgic_v2_state[];
+
 #endif
 
 #endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 92242ce06309..4c182d0aae70 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -200,4 +200,25 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 		     hyp_stack_ptr, vector_ptr);
 }
 
+struct vgic_sr_vectors {
+	void	*save_vgic;
+	void	*restore_vgic;
+};
+
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+	extern struct vgic_sr_vectors __vgic_sr_vectors;
+
+	switch(vgic->type)
+	{
+	case VGIC_V2:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v2_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v2_state;
+		break;
+
+	default:
+		BUG();
+	}
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index dcfd8a616a94..9ff0b2b97392 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -127,6 +127,9 @@ int main(void)
   DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
   DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
   DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
+  DEFINE(VGIC_SAVE_FN,		offsetof(struct vgic_sr_vectors, save_vgic));
+  DEFINE(VGIC_RESTORE_FN,	offsetof(struct vgic_sr_vectors, restore_vgic));
+  DEFINE(VGIC_SR_VECTOR_SZ,	sizeof(struct vgic_sr_vectors));
   DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
   DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
   DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 7e92952d139e..daf24dc59e2c 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -19,5 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 9c5d0acb3654..56df9a352a81 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -16,7 +16,6 @@
  */
 
 #include <linux/linkage.h>
-#include <linux/irqchip/arm-gic.h>
 
 #include <asm/assembler.h>
 #include <asm/memory.h>
@@ -376,100 +375,23 @@
 .endm
 
 /*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
+ * Call into the vgic backend for state saving
  */
 .macro save_vgic_state
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* Save all interesting registers */
-	ldr	w4, [x2, #GICH_HCR]
-	ldr	w5, [x2, #GICH_VMCR]
-	ldr	w6, [x2, #GICH_MISR]
-	ldr	w7, [x2, #GICH_EISR0]
-	ldr	w8, [x2, #GICH_EISR1]
-	ldr	w9, [x2, #GICH_ELRSR0]
-	ldr	w10, [x2, #GICH_ELRSR1]
-	ldr	w11, [x2, #GICH_APR]
-CPU_BE(	rev	w4,  w4  )
-CPU_BE(	rev	w5,  w5  )
-CPU_BE(	rev	w6,  w6  )
-CPU_BE(	rev	w7,  w7  )
-CPU_BE(	rev	w8,  w8  )
-CPU_BE(	rev	w9,  w9  )
-CPU_BE(	rev	w10, w10 )
-CPU_BE(	rev	w11, w11 )
-
-	str	w4, [x3, #VGIC_V2_CPU_HCR]
-	str	w5, [x3, #VGIC_V2_CPU_VMCR]
-	str	w6, [x3, #VGIC_V2_CPU_MISR]
-	str	w7, [x3, #VGIC_V2_CPU_EISR]
-	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
-	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
-	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
-	str	w11, [x3, #VGIC_V2_CPU_APR]
-
-	/* Clear GICH_HCR */
-	str	wzr, [x2, #GICH_HCR]
-
-	/* Save list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x2], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x3], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, VGIC_SAVE_FN]
+	kern_hyp_va	x24
+	blr	x24
 .endm
 
 /*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
+ * Call into the vgic backend for state restoring
  */
 .macro restore_vgic_state
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* We only restore a minimal set of registers */
-	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
-	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
-	ldr	w6, [x3, #VGIC_V2_CPU_APR]
-CPU_BE(	rev	w4, w4 )
-CPU_BE(	rev	w5, w5 )
-CPU_BE(	rev	w6, w6 )
-
-	str	w4, [x2, #GICH_HCR]
-	str	w5, [x2, #GICH_VMCR]
-	str	w6, [x2, #GICH_APR]
-
-	/* Restore list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x3], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x2], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
+	adr	x24, __vgic_sr_vectors
+	ldr	x24, [x24, #VGIC_RESTORE_FN]
+	kern_hyp_va	x24
+	blr	x24
 .endm
 
 .macro save_timer_state
@@ -650,6 +572,12 @@ ENTRY(__kvm_flush_vm_context)
 	ret
 ENDPROC(__kvm_flush_vm_context)
 
+	// struct vgic_sr_vectors __vgi_sr_vectors;
+	.align 3
+ENTRY(__vgic_sr_vectors)
+	.skip	VGIC_SR_VECTOR_SZ
+ENDPROC(__vgic_sr_vectors)
+
 __kvm_hyp_panic:
 	// Guess the context by looking at VTTBR:
 	// If zero, then we're already a host.
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
new file mode 100644
index 000000000000..ae211772f991
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v2-switch.S
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+ENTRY(__save_vgic_v2_state)
+__save_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* Save all interesting registers */
+	ldr	w4, [x2, #GICH_HCR]
+	ldr	w5, [x2, #GICH_VMCR]
+	ldr	w6, [x2, #GICH_MISR]
+	ldr	w7, [x2, #GICH_EISR0]
+	ldr	w8, [x2, #GICH_EISR1]
+	ldr	w9, [x2, #GICH_ELRSR0]
+	ldr	w10, [x2, #GICH_ELRSR1]
+	ldr	w11, [x2, #GICH_APR]
+CPU_BE(	rev	w4,  w4  )
+CPU_BE(	rev	w5,  w5  )
+CPU_BE(	rev	w6,  w6  )
+CPU_BE(	rev	w7,  w7  )
+CPU_BE(	rev	w8,  w8  )
+CPU_BE(	rev	w9,  w9  )
+CPU_BE(	rev	w10, w10 )
+CPU_BE(	rev	w11, w11 )
+
+	str	w4, [x3, #VGIC_V2_CPU_HCR]
+	str	w5, [x3, #VGIC_V2_CPU_VMCR]
+	str	w6, [x3, #VGIC_V2_CPU_MISR]
+	str	w7, [x3, #VGIC_V2_CPU_EISR]
+	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
+	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
+	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	w11, [x3, #VGIC_V2_CPU_APR]
+
+	/* Clear GICH_HCR */
+	str	wzr, [x2, #GICH_HCR]
+
+	/* Save list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x2], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x3], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__save_vgic_v2_state)
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+ENTRY(__restore_vgic_v2_state)
+__restore_vgic_v2_state:
+	/* Get VGIC VCTRL base into x2 */
+	ldr	x2, [x0, #VCPU_KVM]
+	kern_hyp_va	x2
+	ldr	x2, [x2, #KVM_VGIC_VCTRL]
+	kern_hyp_va	x2
+	cbz	x2, 2f		// disabled
+
+	/* Compute the address of struct vgic_cpu */
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	/* We only restore a minimal set of registers */
+	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
+	ldr	w6, [x3, #VGIC_V2_CPU_APR]
+CPU_BE(	rev	w4, w4 )
+CPU_BE(	rev	w5, w5 )
+CPU_BE(	rev	w6, w6 )
+
+	str	w4, [x2, #GICH_HCR]
+	str	w5, [x2, #GICH_VMCR]
+	str	w6, [x2, #GICH_APR]
+
+	/* Restore list registers */
+	add	x2, x2, #GICH_LR0
+	ldr	w4, [x3, #VGIC_CPU_NR_LR]
+	add	x3, x3, #VGIC_V2_CPU_LR
+1:	ldr	w5, [x3], #4
+CPU_BE(	rev	w5, w5 )
+	str	w5, [x2], #4
+	sub	w4, w4, #1
+	cbnz	w4, 1b
+2:
+	ret
+ENDPROC(__restore_vgic_v2_state)
+
+	.popsection
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f6b9fec6fcac..65f1121a3beb 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -24,7 +24,6 @@
 #include <linux/irqreturn.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
-#include <linux/irqchip/arm-gic.h>
 
 #define VGIC_NR_IRQS		256
 #define VGIC_NR_SGIS		16
@@ -71,6 +70,10 @@ struct vgic_bytemap {
 
 struct kvm_vcpu;
 
+enum vgic_type {
+	VGIC_V2,		/* Good ol' GICv2 */
+};
+
 #define LR_STATE_PENDING	(1 << 0)
 #define LR_STATE_ACTIVE		(1 << 1)
 #define LR_STATE_MASK		(3 << 0)
@@ -104,6 +107,8 @@ struct vgic_ops {
 };
 
 struct vgic_params {
+	/* vgic type */
+	enum vgic_type	type;
 	/* Physical address of vgic virtual cpu interface */
 	phys_addr_t	vcpu_base;
 	/* Number of list registers */
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 940418ebd0d0..d6c9c142f813 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -236,6 +236,7 @@ int vgic_v2_probe(struct device_node *vgic_node,
 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
 		 vctrl_res.start, vgic->maint_irq);
 
+	vgic->type = VGIC_V2;
 	*ops = &vgic_v2_ops;
 	*params = vgic;
 	goto out;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 1348e74c4d11..7867b9a1f694 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1568,6 +1568,9 @@ int kvm_vgic_hyp_init(void)
 
 	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
 
+	/* Callback into for arch code for setup */
+	vgic_arch_setup(vgic);
+
 	return 0;
 
 out_free_irq:

From c292a45791af6d60cc7c644809f84810a38e1771 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 9 Aug 2013 18:19:11 +0100
Subject: [PATCH 200/287] arm64: KVM: move HCR_EL2.{IMO,FMO} manipulation into
 the vgic switch code

GICv3 requires the IMO and FMO bits to be tightly coupled with some
of the interrupt controller's register switch.

In order to have similar code paths, move the manipulation of these
bits to the GICv2 switch code.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit ac3c3747e2db2f326ffc601651de544cdd33a8e9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h |  5 +++--
 arch/arm64/kvm/hyp.S             | 17 ++++++++++++-----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3d6903006a8a..cc83520459ed 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -76,9 +76,10 @@
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
 			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
-			 HCR_AMO | HCR_IMO | HCR_FMO | \
-			 HCR_SWIO | HCR_TIDCP | HCR_RW)
+			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
 #define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
+
 
 /* Hyp System Control Register (SCTLR_EL2) bits */
 #define SCTLR_EL2_EE	(1 << 25)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 56df9a352a81..5945f3bdea7a 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -335,11 +335,8 @@
 .endm
 
 .macro activate_traps
-	ldr	x2, [x0, #VCPU_IRQ_LINES]
-	ldr	x1, [x0, #VCPU_HCR_EL2]
-	orr	x2, x2, x1
-	msr	hcr_el2, x2
-
+	ldr     x2, [x0, #VCPU_HCR_EL2]
+	msr     hcr_el2, x2
 	ldr	x2, =(CPTR_EL2_TTA)
 	msr	cptr_el2, x2
 
@@ -382,12 +379,22 @@
 	ldr	x24, [x24, VGIC_SAVE_FN]
 	kern_hyp_va	x24
 	blr	x24
+	mrs	x24, hcr_el2
+	mov	x25, #HCR_INT_OVERRIDE
+	neg	x25, x25
+	and	x24, x24, x25
+	msr	hcr_el2, x24
 .endm
 
 /*
  * Call into the vgic backend for state restoring
  */
 .macro restore_vgic_state
+	mrs	x24, hcr_el2
+	ldr	x25, [x0, #VCPU_IRQ_LINES]
+	orr	x24, x24, #HCR_INT_OVERRIDE
+	orr	x24, x24, x25
+	msr	hcr_el2, x24
 	adr	x24, __vgic_sr_vectors
 	ldr	x24, [x24, #VGIC_RESTORE_FN]
 	kern_hyp_va	x24

From 9a35d57d6984382050d493e2fa1140c9a6b78671 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 12 Jul 2013 15:15:23 +0100
Subject: [PATCH 201/287] KVM: ARM: vgic: add the GICv3 backend

Introduce the support code for emulating a GICv2 on top of GICv3
hardware.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b2fb1c0d378399e1427a91bb991c094f2ca09a2f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   2 +
 arch/arm64/kvm/vgic-v3-switch.S  |  29 ++++
 include/kvm/arm_vgic.h           |  28 ++++
 virt/kvm/arm/vgic-v3.c           | 231 +++++++++++++++++++++++++++++++
 4 files changed, 290 insertions(+)
 create mode 100644 arch/arm64/kvm/vgic-v3-switch.S
 create mode 100644 virt/kvm/arm/vgic-v3.c

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 6252264341c8..ed4987bf9ac7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -106,6 +106,8 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
+extern u64 __vgic_v3_get_ich_vtr_el2(void);
+
 extern char __save_vgic_v2_state[];
 extern char __restore_vgic_v2_state[];
 
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
new file mode 100644
index 000000000000..9fbf27350c84
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+ENTRY(__vgic_v3_get_ich_vtr_el2)
+	mrs	x0, ICH_VTR_EL2
+	ret
+ENDPROC(__vgic_v3_get_ich_vtr_el2)
+
+	.popsection
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 65f1121a3beb..35b0c121bb65 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -33,6 +33,7 @@
 #define VGIC_MAX_CPUS		KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
+#define VGIC_V3_MAX_LRS		16
 
 /* Sanity checks... */
 #if (VGIC_MAX_CPUS > 8)
@@ -72,6 +73,7 @@ struct kvm_vcpu;
 
 enum vgic_type {
 	VGIC_V2,		/* Good ol' GICv2 */
+	VGIC_V3,		/* New fancy GICv3 */
 };
 
 #define LR_STATE_PENDING	(1 << 0)
@@ -172,6 +174,19 @@ struct vgic_v2_cpu_if {
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
 };
 
+struct vgic_v3_cpu_if {
+#ifdef CONFIG_ARM_GIC_V3
+	u32		vgic_hcr;
+	u32		vgic_vmcr;
+	u32		vgic_misr;	/* Saved only */
+	u32		vgic_eisr;	/* Saved only */
+	u32		vgic_elrsr;	/* Saved only */
+	u32		vgic_ap0r[4];
+	u32		vgic_ap1r[4];
+	u64		vgic_lr[VGIC_V3_MAX_LRS];
+#endif
+};
+
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
 	/* per IRQ to LR mapping */
@@ -190,6 +205,7 @@ struct vgic_cpu {
 	/* CPU vif control registers for world switch */
 	union {
 		struct vgic_v2_cpu_if	vgic_v2;
+		struct vgic_v3_cpu_if	vgic_v3;
 	};
 #endif
 };
@@ -224,6 +240,18 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int vgic_v2_probe(struct device_node *vgic_node,
 		  const struct vgic_ops **ops,
 		  const struct vgic_params **params);
+#ifdef CONFIG_ARM_GIC_V3
+int vgic_v3_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params);
+#else
+static inline int vgic_v3_probe(struct device_node *vgic_node,
+				const struct vgic_ops **ops,
+				const struct vgic_params **params)
+{
+	return -ENODEV;
+}
+#endif
 
 #else
 static inline int kvm_vgic_hyp_init(void)
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
new file mode 100644
index 000000000000..f01d44685720
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID		(0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT	(10)
+#define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+/*
+ * LRs are stored in reverse order in memory. make sure we index them
+ * correctly.
+ */
+#define LR_INDEX(lr)			(VGIC_V3_MAX_LRS - 1 - lr)
+
+static u32 ich_vtr_el2;
+
+static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+	struct vgic_lr lr_desc;
+	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
+
+	lr_desc.irq	= val & GICH_LR_VIRTUALID;
+	if (lr_desc.irq <= 15)
+		lr_desc.source	= (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+	else
+		lr_desc.source = 0;
+	lr_desc.state	= 0;
+
+	if (val & ICH_LR_PENDING_BIT)
+		lr_desc.state |= LR_STATE_PENDING;
+	if (val & ICH_LR_ACTIVE_BIT)
+		lr_desc.state |= LR_STATE_ACTIVE;
+	if (val & ICH_LR_EOI)
+		lr_desc.state |= LR_EOI_INT;
+
+	return lr_desc;
+}
+
+static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
+			   struct vgic_lr lr_desc)
+{
+	u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
+		      lr_desc.irq);
+
+	if (lr_desc.state & LR_STATE_PENDING)
+		lr_val |= ICH_LR_PENDING_BIT;
+	if (lr_desc.state & LR_STATE_ACTIVE)
+		lr_val |= ICH_LR_ACTIVE_BIT;
+	if (lr_desc.state & LR_EOI_INT)
+		lr_val |= ICH_LR_EOI;
+
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
+}
+
+static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+				  struct vgic_lr lr_desc)
+{
+	if (!(lr_desc.state & LR_STATE_MASK))
+		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+}
+
+static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr;
+}
+
+static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
+}
+
+static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+	u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
+	u32 ret = 0;
+
+	if (misr & ICH_MISR_EOI)
+		ret |= INT_STATUS_EOI;
+	if (misr & ICH_MISR_U)
+		ret |= INT_STATUS_UNDERFLOW;
+
+	return ret;
+}
+
+static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
+
+	vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
+	vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+	vmcrp->bpr  = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+	vmcrp->pmr  = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+}
+
+static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE;
+}
+
+static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE;
+}
+
+static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+	u32 vmcr;
+
+	vmcr  = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
+	vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
+	vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
+	vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
+
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
+}
+
+static void vgic_v3_enable(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * By forcing VMCR to zero, the GIC will restore the binary
+	 * points to their reset values. Anything else resets to zero
+	 * anyway.
+	 */
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
+
+	/* Get the show on the road... */
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v3_ops = {
+	.get_lr			= vgic_v3_get_lr,
+	.set_lr			= vgic_v3_set_lr,
+	.sync_lr_elrsr		= vgic_v3_sync_lr_elrsr,
+	.get_elrsr		= vgic_v3_get_elrsr,
+	.get_eisr		= vgic_v3_get_eisr,
+	.get_interrupt_status	= vgic_v3_get_interrupt_status,
+	.enable_underflow	= vgic_v3_enable_underflow,
+	.disable_underflow	= vgic_v3_disable_underflow,
+	.get_vmcr		= vgic_v3_get_vmcr,
+	.set_vmcr		= vgic_v3_set_vmcr,
+	.enable			= vgic_v3_enable,
+};
+
+static struct vgic_params vgic_v3_params;
+
+/**
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
+ * @node:	pointer to the DT node
+ * @ops: 	address of a pointer to the GICv3 operations
+ * @params:	address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv3 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v3_probe(struct device_node *vgic_node,
+		  const struct vgic_ops **ops,
+		  const struct vgic_params **params)
+{
+	int ret = 0;
+	u32 gicv_idx;
+	struct resource vcpu_res;
+	struct vgic_params *vgic = &vgic_v3_params;
+
+	vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+	if (!vgic->maint_irq) {
+		kvm_err("error getting vgic maintenance irq from DT\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+
+	/*
+	 * The ListRegs field is 5 bits, but there is a architectural
+	 * maximum of 16 list registers. Just ignore bit 4...
+	 */
+	vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+	if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
+		gicv_idx = 1;
+
+	gicv_idx += 3; /* Also skip GICD, GICC, GICH */
+	if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+		kvm_err("Cannot obtain GICV region\n");
+		ret = -ENXIO;
+		goto out;
+	}
+	vgic->vcpu_base = vcpu_res.start;
+	vgic->vctrl_base = NULL;
+	vgic->type = VGIC_V3;
+
+	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+		 vcpu_res.start, vgic->maint_irq);
+
+	*ops = &vgic_v3_ops;
+	*params = vgic;
+
+out:
+	of_node_put(vgic_node);
+	return ret;
+}

From 825ae8cc53eeb522fb8c8bf9095fbee898cc1766 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 9 Jul 2013 10:45:49 +0100
Subject: [PATCH 202/287] arm64: KVM: vgic: add GICv3 world switch

Introduce the GICv3 world switch code used to save/restore the
GICv3 context.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 754d37726010d872f1f714a8ce8920acdfa4978c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   2 +
 arch/arm64/kernel/asm-offsets.c  |   8 ++
 arch/arm64/kvm/vgic-v3-switch.S  | 238 +++++++++++++++++++++++++++++++
 3 files changed, 248 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ed4987bf9ac7..a28c35b337ec 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -110,6 +110,8 @@ extern u64 __vgic_v3_get_ich_vtr_el2(void);
 
 extern char __save_vgic_v2_state[];
 extern char __restore_vgic_v2_state[];
+extern char __save_vgic_v3_state[];
+extern char __restore_vgic_v3_state[];
 
 #endif
 
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 9ff0b2b97392..65ebb2ccde5f 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -137,6 +137,14 @@ int main(void)
   DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
   DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+  DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
+  DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
+  DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
+  DEFINE(VGIC_V3_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
+  DEFINE(VGIC_V3_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
+  DEFINE(VGIC_V3_CPU_AP0R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
+  DEFINE(VGIC_V3_CPU_AP1R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
+  DEFINE(VGIC_V3_CPU_LR,	offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
   DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
   DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
index 9fbf27350c84..21e68f606a8f 100644
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -18,9 +18,247 @@
 #include <linux/linkage.h>
 #include <linux/irqchip/arm-gic-v3.h>
 
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
 	.text
 	.pushsection	.hyp.text, "ax"
 
+/*
+ * We store LRs in reverse order to let the CPU deal with streaming
+ * access. Use this macro to make it look saner...
+ */
+#define LR_OFFSET(n)	(VGIC_V3_CPU_LR + (15 - n) * 8)
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro	save_vgic_v3_state
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Make sure stores to the GIC via the memory mapped interface
+	// are now visible to the system register interface
+	dsb	st
+
+	// Save all interesting registers
+	mrs	x4, ICH_HCR_EL2
+	mrs	x5, ICH_VMCR_EL2
+	mrs	x6, ICH_MISR_EL2
+	mrs	x7, ICH_EISR_EL2
+	mrs	x8, ICH_ELSR_EL2
+
+	str	w4, [x3, #VGIC_V3_CPU_HCR]
+	str	w5, [x3, #VGIC_V3_CPU_VMCR]
+	str	w6, [x3, #VGIC_V3_CPU_MISR]
+	str	w7, [x3, #VGIC_V3_CPU_EISR]
+	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
+
+	msr	ICH_HCR_EL2, xzr
+
+	mrs	x21, ICH_VTR_EL2
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	mrs	x20, ICH_LR15_EL2
+	mrs	x19, ICH_LR14_EL2
+	mrs	x18, ICH_LR13_EL2
+	mrs	x17, ICH_LR12_EL2
+	mrs	x16, ICH_LR11_EL2
+	mrs	x15, ICH_LR10_EL2
+	mrs	x14, ICH_LR9_EL2
+	mrs	x13, ICH_LR8_EL2
+	mrs	x12, ICH_LR7_EL2
+	mrs	x11, ICH_LR6_EL2
+	mrs	x10, ICH_LR5_EL2
+	mrs	x9, ICH_LR4_EL2
+	mrs	x8, ICH_LR3_EL2
+	mrs	x7, ICH_LR2_EL2
+	mrs	x6, ICH_LR1_EL2
+	mrs	x5, ICH_LR0_EL2
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	str	x20, [x3, #LR_OFFSET(15)]
+	str	x19, [x3, #LR_OFFSET(14)]
+	str	x18, [x3, #LR_OFFSET(13)]
+	str	x17, [x3, #LR_OFFSET(12)]
+	str	x16, [x3, #LR_OFFSET(11)]
+	str	x15, [x3, #LR_OFFSET(10)]
+	str	x14, [x3, #LR_OFFSET(9)]
+	str	x13, [x3, #LR_OFFSET(8)]
+	str	x12, [x3, #LR_OFFSET(7)]
+	str	x11, [x3, #LR_OFFSET(6)]
+	str	x10, [x3, #LR_OFFSET(5)]
+	str	x9, [x3, #LR_OFFSET(4)]
+	str	x8, [x3, #LR_OFFSET(3)]
+	str	x7, [x3, #LR_OFFSET(2)]
+	str	x6, [x3, #LR_OFFSET(1)]
+	str	x5, [x3, #LR_OFFSET(0)]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs	x20, ICH_AP0R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	mrs	x19, ICH_AP0R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+6:	mrs	x18, ICH_AP0R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+5:	mrs	x17, ICH_AP0R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP0R]
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	mrs	x20, ICH_AP1R3_EL2
+	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	mrs	x19, ICH_AP1R2_EL2
+	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+6:	mrs	x18, ICH_AP1R1_EL2
+	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+5:	mrs	x17, ICH_AP1R0_EL2
+	str	w17, [x3, #VGIC_V3_CPU_AP1R]
+
+	// Restore SRE_EL1 access and re-enable SRE at EL1.
+	mrs	x5, ICC_SRE_EL2
+	orr	x5, x5, #ICC_SRE_EL2_ENABLE
+	msr	ICC_SRE_EL2, x5
+	isb
+	mov	x5, #1
+	msr	ICC_SRE_EL1, x5
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro	restore_vgic_v3_state
+	// Disable SRE_EL1 access. Necessary, otherwise
+	// ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
+	msr	ICC_SRE_EL1, xzr
+	isb
+
+	// Compute the address of struct vgic_cpu
+	add	x3, x0, #VCPU_VGIC_CPU
+
+	// Restore all interesting registers
+	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
+	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
+
+	msr	ICH_HCR_EL2, x4
+	msr	ICH_VMCR_EL2, x5
+
+	mrs	x21, ICH_VTR_EL2
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+	msr	ICH_AP1R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+	msr	ICH_AP1R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+	msr	ICH_AP1R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
+	msr	ICH_AP1R0_EL2, x17
+
+	tbnz	w21, #29, 6f	// 6 bits
+	tbz	w21, #30, 5f	// 5 bits
+				// 7 bits
+	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+	msr	ICH_AP0R3_EL2, x20
+	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+	msr	ICH_AP0R2_EL2, x19
+6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+	msr	ICH_AP0R1_EL2, x18
+5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
+	msr	ICH_AP0R0_EL2, x17
+
+	and	w22, w21, #0xf
+	mvn	w22, w21
+	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	ldr	x20, [x3, #LR_OFFSET(15)]
+	ldr	x19, [x3, #LR_OFFSET(14)]
+	ldr	x18, [x3, #LR_OFFSET(13)]
+	ldr	x17, [x3, #LR_OFFSET(12)]
+	ldr	x16, [x3, #LR_OFFSET(11)]
+	ldr	x15, [x3, #LR_OFFSET(10)]
+	ldr	x14, [x3, #LR_OFFSET(9)]
+	ldr	x13, [x3, #LR_OFFSET(8)]
+	ldr	x12, [x3, #LR_OFFSET(7)]
+	ldr	x11, [x3, #LR_OFFSET(6)]
+	ldr	x10, [x3, #LR_OFFSET(5)]
+	ldr	x9, [x3, #LR_OFFSET(4)]
+	ldr	x8, [x3, #LR_OFFSET(3)]
+	ldr	x7, [x3, #LR_OFFSET(2)]
+	ldr	x6, [x3, #LR_OFFSET(1)]
+	ldr	x5, [x3, #LR_OFFSET(0)]
+
+	adr	x24, 1f
+	add	x24, x24, x23
+	br	x24
+
+1:
+	msr	ICH_LR15_EL2, x20
+	msr	ICH_LR14_EL2, x19
+	msr	ICH_LR13_EL2, x18
+	msr	ICH_LR12_EL2, x17
+	msr	ICH_LR11_EL2, x16
+	msr	ICH_LR10_EL2, x15
+	msr	ICH_LR9_EL2,  x14
+	msr	ICH_LR8_EL2,  x13
+	msr	ICH_LR7_EL2,  x12
+	msr	ICH_LR6_EL2,  x11
+	msr	ICH_LR5_EL2,  x10
+	msr	ICH_LR4_EL2,   x9
+	msr	ICH_LR3_EL2,   x8
+	msr	ICH_LR2_EL2,   x7
+	msr	ICH_LR1_EL2,   x6
+	msr	ICH_LR0_EL2,   x5
+
+	// Ensure that the above will have reached the
+	// (re)distributors. This ensure the guest will read
+	// the correct values from the memory-mapped interface.
+	isb
+	dsb	sy
+
+	// Prevent the guest from touching the GIC system registers
+	mrs	x5, ICC_SRE_EL2
+	and	x5, x5, #~ICC_SRE_EL2_ENABLE
+	msr	ICC_SRE_EL2, x5
+.endm
+
+ENTRY(__save_vgic_v3_state)
+	save_vgic_v3_state
+	ret
+ENDPROC(__save_vgic_v3_state)
+
+ENTRY(__restore_vgic_v3_state)
+	restore_vgic_v3_state
+	ret
+ENDPROC(__restore_vgic_v3_state)
+
 ENTRY(__vgic_v3_get_ich_vtr_el2)
 	mrs	x0, ICH_VTR_EL2
 	ret

From 0e705de508452a4437225048231f8e7aa5f56d2c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 9 Jul 2013 10:45:49 +0100
Subject: [PATCH 203/287] arm64: KVM: vgic: enable GICv2 emulation on top on
 GICv3 hardware

Add the last missing bits that enable GICv2 emulation on top of
GICv3 hardware.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 67b2abfedb7b861bead93400fa315c5c30879d51)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 7 +++++++
 arch/arm64/kvm/Makefile           | 2 ++
 virt/kvm/arm/vgic.c               | 1 +
 3 files changed, 10 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4c182d0aae70..4ae9213aa997 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -216,6 +216,13 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v2_state;
 		break;
 
+#ifdef CONFIG_ARM_GIC_V3
+	case VGIC_V3:
+		__vgic_sr_vectors.save_vgic	= __save_vgic_v3_state;
+		__vgic_sr_vectors.restore_vgic	= __restore_vgic_v3_state;
+		break;
+#endif
+
 	default:
 		BUG();
 	}
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index daf24dc59e2c..32a096174b94 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -22,4 +22,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 7867b9a1f694..795ab482333d 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1530,6 +1530,7 @@ static struct notifier_block vgic_cpu_nb = {
 
 static const struct of_device_id vgic_ids[] = {
 	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
 	{},
 };
 

From ff815e50964d2593064d47c9dbd4a3bb03ea3e18 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:01 -0700
Subject: [PATCH 204/287] ARM: KVM: fix vgic V7 assembler code to work in BE
 image

The vgic h/w registers are little endian; when BE asm code
reads/writes from/to them, it needs to do byteswap after/before.
Byteswap code uses ARM_BE8 wrapper to add swap only if
CONFIG_CPU_BIG_ENDIAN is configured.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 64054c25cf7e060cd6780744fefe7ed3990e4f21)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/interrupts_head.S | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index e4eaf30205c5..68d99c69639c 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -1,4 +1,5 @@
 #include <linux/irqchip/arm-gic.h>
+#include <asm/assembler.h>
 
 #define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
 #define VCPU_USR_SP		(VCPU_USR_REG(13))
@@ -420,6 +421,14 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r8, [r2, #GICH_ELRSR0]
 	ldr	r9, [r2, #GICH_ELRSR1]
 	ldr	r10, [r2, #GICH_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r5, r5	)
+ARM_BE8(rev	r6, r6	)
+ARM_BE8(rev	r7, r7	)
+ARM_BE8(rev	r8, r8	)
+ARM_BE8(rev	r9, r9	)
+ARM_BE8(rev	r10, r10	)
 
 	str	r3, [r11, #VGIC_V2_CPU_HCR]
 	str	r4, [r11, #VGIC_V2_CPU_VMCR]
@@ -439,6 +448,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r2], #4
+ARM_BE8(rev	r6, r6	)
 	str	r6, [r3], #4
 	subs	r4, r4, #1
 	bne	1b
@@ -466,6 +476,9 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	ldr	r3, [r11, #VGIC_V2_CPU_HCR]
 	ldr	r4, [r11, #VGIC_V2_CPU_VMCR]
 	ldr	r8, [r11, #VGIC_V2_CPU_APR]
+ARM_BE8(rev	r3, r3	)
+ARM_BE8(rev	r4, r4	)
+ARM_BE8(rev	r8, r8	)
 
 	str	r3, [r2, #GICH_HCR]
 	str	r4, [r2, #GICH_VMCR]
@@ -476,6 +489,7 @@ vcpu	.req	r0		@ vcpu pointer always in r0
 	add	r3, r11, #VGIC_V2_CPU_LR
 	ldr	r4, [r11, #VGIC_CPU_NR_LR]
 1:	ldr	r6, [r3], #4
+ARM_BE8(rev	r6, r6  )
 	str	r6, [r2], #4
 	subs	r4, r4, #1
 	bne	1b

From af39d18ecef850d57705c56cda31da0d855424ea Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:02 -0700
Subject: [PATCH 205/287] ARM: KVM: handle 64bit values passed to mrcc or from
 mcrr instructions in BE case

In some cases the mcrr and mrrc instructions in combination with the ldrd
and strd instructions need to deal with 64bit value in memory. The ldrd
and strd instructions already handle endianness within word (register)
boundaries but to get effect of the whole 64bit value represented correctly,
rr_lo_hi macro is introduced and is used to swap registers positions when
the mcrr and mrrc instructions are used. That has the effect of swapping
two words.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 19b0e60a63f758a28329aa40f4270a6c98c2dcb7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_asm.h | 18 ++++++++++++++++++
 arch/arm/kvm/init.S            |  4 ++--
 arch/arm/kvm/interrupts.S      |  4 ++--
 arch/arm/kvm/interrupts_head.S |  6 +++---
 4 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 53b3c4a50d5c..3a67bec72d0c 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -61,6 +61,24 @@
 #define ARM_EXCEPTION_FIQ	  6
 #define ARM_EXCEPTION_HVC	  7
 
+/*
+ * The rr_lo_hi macro swaps a pair of registers depending on
+ * current endianness. It is used in conjunction with ldrd and strd
+ * instructions that load/store a 64-bit value from/to memory to/from
+ * a pair of registers which are used with the mrrc and mcrr instructions.
+ * If used with the ldrd/strd instructions, the a1 parameter is the first
+ * source/destination register and the a2 parameter is the second
+ * source/destination register. Note that the ldrd/strd instructions
+ * already swap the bytes within the words correctly according to the
+ * endianness setting, but the order of the registers need to be effectively
+ * swapped when used with the mrrc/mcrr instructions.
+ */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define rr_lo_hi(a1, a2) a2, a1
+#else
+#define rr_lo_hi(a1, a2) a1, a2
+#endif
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index 1b9844d369cc..2cc14dfad049 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -71,7 +71,7 @@ __do_hyp_init:
 	bne	phase2			@ Yes, second stage init
 
 	@ Set the HTTBR to point to the hypervisor PGD pointer passed
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 
 	@ Set the HTCR and VTCR to the same shareability and cacheability
 	@ settings as the non-secure TTBCR and with T0SZ == 0.
@@ -137,7 +137,7 @@ phase2:
 	mov	pc, r0
 
 target:	@ We're now in the trampoline code, switch page tables
-	mcrr	p15, 4, r2, r3, c2
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c2
 	isb
 
 	@ Invalidate the old TLBs
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 0d68d4073068..24d4e65806a7 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -52,7 +52,7 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
 	dsb	ishst
 	add	r0, r0, #KVM_VTTBR
 	ldrd	r2, r3, [r0]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 	isb
 	mcr     p15, 0, r0, c8, c3, 0	@ TLBIALLIS (rt ignored)
 	dsb	ish
@@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run)
 	ldr	r1, [vcpu, #VCPU_KVM]
 	add	r1, r1, #KVM_VTTBR
 	ldrd	r2, r3, [r1]
-	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	mcrr	p15, 6, rr_lo_hi(r2, r3), c2	@ Write VTTBR
 
 	@ We're all done, just restore the GPRs and go to the guest
 	restore_guest_regs
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 68d99c69639c..98c8c5b9a87f 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -520,7 +520,7 @@ ARM_BE8(rev	r6, r6  )
 	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
 	isb
 
-	mrrc	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mrrc	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	strd	r2, r3, [r5]
@@ -560,12 +560,12 @@ ARM_BE8(rev	r6, r6  )
 
 	ldr	r2, [r4, #KVM_TIMER_CNTVOFF]
 	ldr	r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
-	mcrr	p15, 4, r2, r3, c14	@ CNTVOFF
+	mcrr	p15, 4, rr_lo_hi(r2, r3), c14	@ CNTVOFF
 
 	ldr	r4, =VCPU_TIMER_CNTV_CVAL
 	add	r5, vcpu, r4
 	ldrd	r2, r3, [r5]
-	mcrr	p15, 3, r2, r3, c14	@ CNTV_CVAL
+	mcrr	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
 	isb
 
 	ldr	r2, [vcpu, #VCPU_TIMER_CNTV_CTL]

From b76b02ab6245e8641cc4acdc16b0050132bd9065 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:03 -0700
Subject: [PATCH 206/287] ARM: KVM: __kvm_vcpu_run function return result fix
 in BE case

The __kvm_vcpu_run function returns a 64-bit result in two registers,
which has to be adjusted for BE case.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 6d7311b520864531c81f0e0237e96146d8057d77)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/interrupts.S | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 24d4e65806a7..01dcb0e752d9 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -199,8 +199,13 @@ after_vfp_restore:
 
 	restore_host_regs
 	clrex				@ Clear exclusive monitor
+#ifndef CONFIG_CPU_ENDIAN_BE8
 	mov	r0, r1			@ Return the return code
 	mov	r1, #0			@ Clear upper bits in return value
+#else
+	@ r1 already has return code
+	mov	r0, #0			@ Clear upper bits in return value
+#endif /* CONFIG_CPU_ENDIAN_BE8 */
 	bx	lr			@ return to IOCTL
 
 /********************************************************************

From df5bce90aca5ced2e4ccee2a9ef913b064a47aea Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:04 -0700
Subject: [PATCH 207/287] ARM: KVM: vgic mmio should hold data as LE bytes
 array in BE case

According to recent clarifications of mmio.data array meaning -
the mmio.data array should hold bytes as they would appear in
memory. Vgic is little endian device. And in case of BE image
kernel side that emulates vgic, holds data in BE form. So we
need to byteswap cpu<->le32 vgic registers when we read/write them
from mmio.data[].

Change has no effect in LE case because cpu already runs in le32.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 1c9f04717ca8326e8df759d5dda9cd1b3d968b5b)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 795ab482333d..b0edc8c670f8 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -238,12 +238,12 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
 
 static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
 {
-	return *((u32 *)mmio->data) & mask;
+	return le32_to_cpu(*((u32 *)mmio->data)) & mask;
 }
 
 static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
 {
-	*((u32 *)mmio->data) = value & mask;
+	*((u32 *)mmio->data) = cpu_to_le32(value) & mask;
 }
 
 /**

From d6ff09058017a8dfd96fa24502691c83df6566e7 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:05 -0700
Subject: [PATCH 208/287] ARM: KVM: MMIO support BE host running LE code

In case of status register E bit is not set (LE mode) and host runs in
BE mode we need byteswap data, so read/write is emulated correctly.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 27f194fd360a96cc64bebb2d69dd5abd67984b8a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 0fa90c962ac8..69b746955fca 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -185,9 +185,16 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
 		default:
 			return be32_to_cpu(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		default:
+			return le32_to_cpu(data);
+		}
 	}
-
-	return data;		/* Leave LE untouched */
 }
 
 static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
@@ -203,9 +210,16 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 		default:
 			return cpu_to_be32(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		default:
+			return cpu_to_le32(data);
+		}
 	}
-
-	return data;		/* Leave LE untouched */
 }
 
 #endif /* __ARM_KVM_EMULATE_H__ */

From 1f5f8779f88483402671d58f0e307934218e22f6 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:06 -0700
Subject: [PATCH 209/287] ARM: KVM: one_reg coproc set and get BE fixes

Fix code that handles KVM_SET_ONE_REG, KVM_GET_ONE_REG ioctls to work in BE
image. Before this fix get/set_one_reg functions worked correctly only in
LE case - reg_from_user was taking 'void *' kernel address that actually could
be target/source memory of either 4 bytes size or 8 bytes size, and code copied
from/to user memory that could hold either 4 bytes register, 8 byte register
or pair of 4 bytes registers.

In order to work in endian agnostic way reg_from_user to reg_to_user functions
should copy register value only to kernel variable with size that matches
register size. In few place where size mismatch existed fix issue on macro
caller side.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 73891f72c414afff6da6f01e7af2ff5a44a8b823)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c | 88 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index c58a35116f63..37a0fe1bb9bb 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -44,6 +44,31 @@ static u32 cache_levels;
 /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
 #define CSSELR_MAX 12
 
+/*
+ * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some
+ * of cp15 registers can be viewed either as couple of two u32 registers
+ * or one u64 register. Current u64 register encoding is that least
+ * significant u32 word is followed by most significant u32 word.
+ */
+static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
+				       const struct coproc_reg *r,
+				       u64 val)
+{
+	vcpu->arch.cp15[r->reg] = val & 0xffffffff;
+	vcpu->arch.cp15[r->reg + 1] = val >> 32;
+}
+
+static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
+				      const struct coproc_reg *r)
+{
+	u64 val;
+
+	val = vcpu->arch.cp15[r->reg + 1];
+	val = val << 32;
+	val = val | vcpu->arch.cp15[r->reg];
+	return val;
+}
+
 int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	kvm_inject_undefined(vcpu);
@@ -682,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = {
 	{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
 };
 
+/*
+ * Reads a register value from a userspace address to a kernel
+ * variable. Make sure that register size matches sizeof(*__val).
+ */
 static int reg_from_user(void *val, const void __user *uaddr, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
 }
 
+/*
+ * Writes a register value to a userspace address from a kernel variable.
+ * Make sure that register size matches sizeof(*__val).
+ */
 static int reg_to_user(void __user *uaddr, const void *val, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
@@ -702,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 {
 	struct coproc_params params;
 	const struct coproc_reg *r;
+	int ret;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -710,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	return reg_to_user(uaddr, &r->val, id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val = r->val;
+
+		ret = reg_to_user(uaddr, &val, id);
+	} else if (KVM_REG_SIZE(id) == 8) {
+		ret = reg_to_user(uaddr, &r->val, id);
+	}
+	return ret;
 }
 
 static int set_invariant_cp15(u64 id, void __user *uaddr)
@@ -718,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	struct coproc_params params;
 	const struct coproc_reg *r;
 	int err;
-	u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+	u64 val;
 
 	if (!index_to_params(id, &params))
 		return -ENOENT;
@@ -726,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
 	if (!r)
 		return -ENOENT;
 
-	err = reg_from_user(&val, uaddr, id);
+	err = -ENOENT;
+	if (KVM_REG_SIZE(id) == 4) {
+		u32 val32;
+
+		err = reg_from_user(&val32, uaddr, id);
+		if (!err)
+			val = val32;
+	} else if (KVM_REG_SIZE(id) == 8) {
+		err = reg_from_user(&val, uaddr, id);
+	}
 	if (err)
 		return err;
 
@@ -1004,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_get(reg->id, uaddr);
@@ -1015,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return get_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit. */
-	return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		val = vcpu_cp15_reg64_get(vcpu, r);
+		ret = reg_to_user(uaddr, &val, reg->id);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+	}
+
+	return ret;
 }
 
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
 	const struct coproc_reg *r;
 	void __user *uaddr = (void __user *)(long)reg->addr;
+	int ret;
 
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
 		return demux_c15_set(reg->id, uaddr);
@@ -1034,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (!r)
 		return set_invariant_cp15(reg->id, uaddr);
 
-	/* Note: copies two regs if size is 64 bit */
-	return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	ret = -ENOENT;
+	if (KVM_REG_SIZE(reg->id) == 8) {
+		u64 val;
+
+		ret = reg_from_user(&val, uaddr, reg->id);
+		if (!ret)
+			vcpu_cp15_reg64_set(vcpu, r, val);
+	} else if (KVM_REG_SIZE(reg->id) == 4) {
+		ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+	}
+
+	return ret;
 }
 
 static unsigned int num_demux_regs(void)

From 7c1537fb45e64848a4c6f19b66fec1671d608d1b Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:07 -0700
Subject: [PATCH 210/287] ARM: KVM: enable KVM in Kconfig on big-endian systems

Previous patches addresses ARMV7 big-endian virtualiztion,
kvm related issues, so enable ARM_VIRT_EXT for big-endian
now.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f5aa462147a209dab40b02f0f70234784b913bf9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 4be5bb150bdd..466bd299b1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -23,7 +23,7 @@ config KVM
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select KVM_MMIO
 	select KVM_ARM_HOST
-	depends on ARM_VIRT_EXT && ARM_LPAE && !CPU_BIG_ENDIAN
+	depends on ARM_VIRT_EXT && ARM_LPAE
 	---help---
 	  Support hosting virtualized guest machines. You will also
 	  need to select one or more of the processor modules below.

From 5d129b5dba663c42acca6a4711cc8732f692a18a Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:08 -0700
Subject: [PATCH 211/287] ARM64: KVM: MMIO support BE host running LE code

In case of guest CPU running in LE mode and host runs in
BE mode we need byteswap data, so read/write is emulated correctly.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b30070862edbdb252f9d0d3a1e61b8dc4c68e3d2)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index dd8ecfc3f995..fdc3e21abd8d 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -213,6 +213,17 @@ static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
 		default:
 			return be64_to_cpu(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return le16_to_cpu(data & 0xffff);
+		case 4:
+			return le32_to_cpu(data & 0xffffffff);
+		default:
+			return le64_to_cpu(data);
+		}
 	}
 
 	return data;		/* Leave LE untouched */
@@ -233,6 +244,17 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
 		default:
 			return cpu_to_be64(data);
 		}
+	} else {
+		switch (len) {
+		case 1:
+			return data & 0xff;
+		case 2:
+			return cpu_to_le16(data & 0xffff);
+		case 4:
+			return cpu_to_le32(data & 0xffffffff);
+		default:
+			return cpu_to_le64(data);
+		}
 	}
 
 	return data;		/* Leave LE untouched */

From e51ab0ff6436524131e948a9a31258bb859b1ec5 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:09 -0700
Subject: [PATCH 212/287] ARM64: KVM: store kvm_vcpu_fault_info est_el2 as word

esr_el2 field of struct kvm_vcpu_fault_info has u32 type.
It should be stored as word. Current code works in LE case
because existing puts least significant word of x1 into
esr_el2, and it puts most significant work of x1 into next
field, which accidentally is OK because it is updated again
by next instruction. But existing code breaks in BE case.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit ba083d20d8cfa9e999043cd89c4ebc964ccf8927)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 5945f3bdea7a..7874e022d077 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -762,7 +762,7 @@ el1_trap:
 	mrs	x2, far_el2
 
 2:	mrs	x0, tpidr_el2
-	str	x1, [x0, #VCPU_ESR_EL2]
+	str	w1, [x0, #VCPU_ESR_EL2]
 	str	x2, [x0, #VCPU_FAR_EL2]
 	str	x3, [x0, #VCPU_HPFAR_EL2]
 

From df032d7ed86bcaf850b549698c62eb182c2cb4e7 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:10 -0700
Subject: [PATCH 213/287] ARM64: KVM: fix vgic_bitmap_get_reg function for BE
 64bit case

Fix vgic_bitmap_get_reg function to return 'right' word address of
'unsigned long' bitmap value in case of BE 64bit image.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 9662fb4854e1319b4affda47f279c3f210316def)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index b0edc8c670f8..ede8f6466c95 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -98,14 +98,34 @@ static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 static const struct vgic_ops *vgic_ops;
 static const struct vgic_params *vgic;
 
+/*
+ * struct vgic_bitmap contains unions that provide two views of
+ * the same data. In one case it is an array of registers of
+ * u32's, and in the other case it is a bitmap of unsigned
+ * longs.
+ *
+ * This does not work on 64-bit BE systems, because the bitmap access
+ * will store two consecutive 32-bit words with the higher-addressed
+ * register's bits at the lower index and the lower-addressed register's
+ * bits at the higher index.
+ *
+ * Therefore, swizzle the register index when accessing the 32-bit word
+ * registers to access the right register's value.
+ */
+#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
+#define REG_OFFSET_SWIZZLE	1
+#else
+#define REG_OFFSET_SWIZZLE	0
+#endif
+
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
 {
 	offset >>= 2;
 	if (!offset)
-		return x->percpu[cpuid].reg;
+		return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE);
 	else
-		return x->shared.reg + offset - 1;
+		return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
 }
 
 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,

From 21c40b1a24a48d76af181a1b18315a4f0e61b37e Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Thu, 12 Jun 2014 09:30:12 -0700
Subject: [PATCH 214/287] ARM64: KVM: set and get of sys registers in BE case

Since size of all sys registers is always 8 bytes. Current
code is actually endian agnostic. Just clean it up a bit.
Removed comment about little endian. Change type of pointer
from 'void *' to 'u64 *' to enforce stronger type checking.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 26c99af1018c35020cfad1d20f02acb224807655)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 34f25a590bd7..f0ceceffa95a 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -776,17 +776,15 @@ static struct sys_reg_desc invariant_sys_regs[] = {
 	  NULL, get_ctr_el0 },
 };
 
-static int reg_from_user(void *val, const void __user *uaddr, u64 id)
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;
 }
 
-static int reg_to_user(void __user *uaddr, const void *val, u64 id)
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
 {
-	/* This Just Works because we are little endian. */
 	if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
 		return -EFAULT;
 	return 0;

From 5208e0ff0d5097acc955d60d4daa395893a772ce Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Wed, 2 Jul 2014 17:19:30 +0100
Subject: [PATCH 215/287] ARM64: KVM: fix big endian issue in access_vm_reg for
 32bit guest

Fix issue with 32bit guests running on top of BE KVM host.
Indexes of high and low words of 64bit cp15 register are
swapped in case of big endian code, since 64bit cp15 state is
restored or saved with double word write or read instruction.

Define helper macro to access low words of 64bit cp15 register.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit f0a3eaff71b8bd5d5acfda1f0cf3eedf49755622)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h |  6 ++++++
 arch/arm64/kvm/sys_regs.c         | 10 ++++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 4ae9213aa997..503c70661636 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -140,6 +140,12 @@ struct kvm_vcpu_arch {
 #define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
 #define vcpu_cp15(v,r)		((v)->arch.ctxt.cp15[(r)])
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)])
+#else
+#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)])
+#endif
+
 struct kvm_vm_stat {
 	u32 remote_tlb_flush;
 };
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f0ceceffa95a..56288f31c12d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -134,13 +134,11 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
 	BUG_ON(!p->is_write);
 
 	val = *vcpu_reg(vcpu, p->Rt);
-	if (!p->is_aarch32) {
+	if (!p->is_aarch32 || !p->is_32bit)
 		vcpu_sys_reg(vcpu, r->reg) = val;
-	} else {
-		vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;
-		if (!p->is_32bit)
-			vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
-	}
+	else
+		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+
 	return true;
 }
 

From 129fb6b2b94c46e279406736a959cc39a23500c6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:21:16 +0100
Subject: [PATCH 216/287] arm64: KVM: rename pm_fake handler to trap_raz_wi

pm_fake doesn't quite describe what the handler does (ignoring writes
and returning 0 for reads).

As we're about to use it (a lot) in a different context, rename it
with a (admitedly cryptic) name that make sense for all users.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 7609c1251f9d8bbcd6a05ba22153e50cf4f88cff)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 83 ++++++++++++++++++++-------------------
 1 file changed, 43 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 56288f31c12d..492ba301e10c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -161,18 +161,9 @@ static bool access_sctlr(struct kvm_vcpu *vcpu,
 	return true;
 }
 
-/*
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters.  Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- */
-static bool pm_fake(struct kvm_vcpu *vcpu,
-		    const struct sys_reg_params *p,
-		    const struct sys_reg_desc *r)
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
 {
 	if (p->is_write)
 		return ignore_write(vcpu, p);
@@ -199,6 +190,17 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters.  Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ *
+ * Same goes for the whole debug infrastructure, which probably breaks
+ * some guest functionnality. This should be fixed.
  */
 static const struct sys_reg_desc sys_reg_descs[] = {
 	/* DC ISW */
@@ -258,10 +260,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* PMINTENSET_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMINTENCLR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 
 	/* MAIR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
@@ -290,43 +292,43 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* PMCR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCNTENSET_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCNTENCLR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMOVSCLR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMSWINC_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMSELR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCEID0_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCEID1_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMCCNTR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMXEVTYPER_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMXEVCNTR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMUSERENR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
-	  pm_fake },
+	  trap_raz_wi },
 	/* PMOVSSET_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
-	  pm_fake },
+	  trap_raz_wi },
 
 	/* TPIDR_EL0 */
 	{ Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
@@ -372,19 +374,20 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
 	{ Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
 
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
-	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
+	/* PMU */
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
+	{ Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
 
 	{ Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
 	{ Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },

From efacbc423cf057a9051ca0fd240e68f5596700e7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 7 May 2014 12:13:14 +0100
Subject: [PATCH 217/287] arm64: move DBG_MDSCR_* to asm/debug-monitors.h

In order to be able to use the DBG_MDSCR_* macros from the KVM code,
move the relevant definitions to the obvious include file.

Also move the debug_el enum to a portion of the file that is guarded
by #ifndef __ASSEMBLY__ in order to use that file from assembly code.

Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 51ba248164d0eeb8b4f94d405430c18a56c6ac9a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/debug-monitors.h | 19 ++++++++++++++-----
 arch/arm64/kernel/debug-monitors.c      |  9 ---------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7c951a510b54..aab72ce22348 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -18,6 +18,15 @@
 
 #ifdef __KERNEL__
 
+/* Low-level stepping controls. */
+#define DBG_MDSCR_SS		(1 << 0)
+#define DBG_SPSR_SS		(1 << 21)
+
+/* MDSCR_EL1 enabling bits */
+#define DBG_MDSCR_KDE		(1 << 13)
+#define DBG_MDSCR_MDE		(1 << 15)
+#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
+
 #define	DBG_ESR_EVT(x)		(((x) >> 27) & 0x7)
 
 /* AArch64 */
@@ -73,11 +82,6 @@
 
 #define CACHE_FLUSH_IS_SAFE		1
 
-enum debug_el {
-	DBG_ACTIVE_EL0 = 0,
-	DBG_ACTIVE_EL1,
-};
-
 /* AArch32 */
 #define DBG_ESR_EVT_BKPT	0x4
 #define DBG_ESR_EVT_VECC	0x5
@@ -115,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook);
 
 u8 debug_monitors_arch(void);
 
+enum debug_el {
+	DBG_ACTIVE_EL0 = 0,
+	DBG_ACTIVE_EL1,
+};
+
 void enable_debug_monitors(enum debug_el el);
 void disable_debug_monitors(enum debug_el el);
 
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 553a120fc838..fea84694fce4 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -31,15 +31,6 @@
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
 
-/* Low-level stepping controls. */
-#define DBG_MDSCR_SS		(1 << 0)
-#define DBG_SPSR_SS		(1 << 21)
-
-/* MDSCR_EL1 enabling bits */
-#define DBG_MDSCR_KDE		(1 << 13)
-#define DBG_MDSCR_MDE		(1 << 15)
-#define DBG_MDSCR_MASK		~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
-
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {

From ad4686e6b3c82d7cf3f79853cc12f655ce444668 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:24:46 +0100
Subject: [PATCH 218/287] arm64: KVM: add trap handlers for AArch64 debug
 registers

Add handlers for all the AArch64 debug registers that are accessible
from EL0 or EL1. The trapping code keeps track of the state of the
debug registers, allowing for the switch code to implement a lazy
switching strategy.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 0c557ed4983b7abe152212b5b1726c2a789b2c61)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h  |  28 ++++--
 arch/arm64/include/asm/kvm_host.h |   3 +
 arch/arm64/kvm/sys_regs.c         | 150 +++++++++++++++++++++++++++++-
 3 files changed, 172 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index a28c35b337ec..660f75c48bbb 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -45,14 +45,25 @@
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
 #define	PAR_EL1		21	/* Physical Address Register */
+#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
+#define DBGBCR0_EL1	23	/* Debug Breakpoint Control Registers (0-15) */
+#define DBGBCR15_EL1	38
+#define DBGBVR0_EL1	39	/* Debug Breakpoint Value Registers (0-15) */
+#define DBGBVR15_EL1	54
+#define DBGWCR0_EL1	55	/* Debug Watchpoint Control Registers (0-15) */
+#define DBGWCR15_EL1	70
+#define DBGWVR0_EL1	71	/* Debug Watchpoint Value Registers (0-15) */
+#define DBGWVR15_EL1	86
+#define MDCCINT_EL1	87	/* Monitor Debug Comms Channel Interrupt Enable Reg */
+
 /* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	22	/* Domain Access Control Register */
-#define	IFSR32_EL2	23	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	24	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	25	/* Debug Vector Catch Register */
-#define	TEECR32_EL1	26	/* ThumbEE Configuration Register */
-#define	TEEHBR32_EL1	27	/* ThumbEE Handler Base Register */
-#define	NR_SYS_REGS	28
+#define	DACR32_EL2	88	/* Domain Access Control Register */
+#define	IFSR32_EL2	89	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	90	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	91	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	92	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	93	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	94
 
 /* 32bit mapping */
 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
@@ -89,6 +100,9 @@
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
 
+#define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
+#define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 503c70661636..8e410f761918 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -101,6 +101,9 @@ struct kvm_vcpu_arch {
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
+	/* Debug state */
+	u64 debug_flags;
+
 	/* Pointer to host CPU context */
 	kvm_cpu_context_t *host_cpu_context;
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 492ba301e10c..d53ce430b178 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -30,6 +30,7 @@
 #include <asm/kvm_mmu.h>
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
+#include <asm/debug-monitors.h>
 #include <trace/events/kvm.h>
 
 #include "sys_regs.h"
@@ -171,6 +172,73 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
 		return read_zero(vcpu, p);
 }
 
+static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_params *p,
+			   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
+		return true;
+	}
+}
+
+static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
+				   const struct sys_reg_params *p,
+				   const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u32 val;
+		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
+		*vcpu_reg(vcpu, p->Rt) = val;
+		return true;
+	}
+}
+
+/*
+ * We want to avoid world-switching all the DBG registers all the
+ * time:
+ * 
+ * - If we've touched any debug register, it is likely that we're
+ *   going to touch more of them. It then makes sense to disable the
+ *   traps and start doing the save/restore dance
+ * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
+ *   then mandatory to save/restore the registers, as the guest
+ *   depends on them.
+ * 
+ * For this, we use a DIRTY bit, indicating the guest has modified the
+ * debug registers, used as follow:
+ *
+ * On guest entry:
+ * - If the dirty bit is set (because we're coming back from trapping),
+ *   disable the traps, save host registers, restore guest registers.
+ * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
+ *   set the dirty bit, disable the traps, save host registers,
+ *   restore guest registers.
+ * - Otherwise, enable the traps
+ *
+ * On guest exit:
+ * - If the dirty bit is set, save guest registers, restore host
+ *   registers and clear the dirty bit. This ensure that the host can
+ *   now use the debug registers.
+ */
+static bool trap_debug_regs(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+	}
+
+	return true;
+}
+
 static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
 	u64 amair;
@@ -187,6 +255,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
 }
 
+/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
+#define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
+	/* DBGBVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
+	  trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 },		\
+	/* DBGBCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
+	  trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 },		\
+	/* DBGWVRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
+	  trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 },		\
+	/* DBGWCRn_EL1 */						\
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
+	  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
+
 /*
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -199,8 +282,12 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
  * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
  * all PM registers, which doesn't crash the guest kernel at least.
  *
- * Same goes for the whole debug infrastructure, which probably breaks
- * some guest functionnality. This should be fixed.
+ * Debug handling: We do trap most, if not all debug related system
+ * registers. The implementation is good enough to ensure that a guest
+ * can use these with minimal performance degradation. The drawback is
+ * that we don't implement any of the external debug, none of the
+ * OSlock protocol. This should be revisited if we ever encounter a
+ * more demanding guest...
  */
 static const struct sys_reg_desc sys_reg_descs[] = {
 	/* DC ISW */
@@ -213,12 +300,71 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
 	  access_dcsw },
 
+	DBG_BCR_BVR_WCR_WVR_EL1(0),
+	DBG_BCR_BVR_WCR_WVR_EL1(1),
+	/* MDCCINT_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+	  trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+	/* MDSCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+	  trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+	DBG_BCR_BVR_WCR_WVR_EL1(2),
+	DBG_BCR_BVR_WCR_WVR_EL1(3),
+	DBG_BCR_BVR_WCR_WVR_EL1(4),
+	DBG_BCR_BVR_WCR_WVR_EL1(5),
+	DBG_BCR_BVR_WCR_WVR_EL1(6),
+	DBG_BCR_BVR_WCR_WVR_EL1(7),
+	DBG_BCR_BVR_WCR_WVR_EL1(8),
+	DBG_BCR_BVR_WCR_WVR_EL1(9),
+	DBG_BCR_BVR_WCR_WVR_EL1(10),
+	DBG_BCR_BVR_WCR_WVR_EL1(11),
+	DBG_BCR_BVR_WCR_WVR_EL1(12),
+	DBG_BCR_BVR_WCR_WVR_EL1(13),
+	DBG_BCR_BVR_WCR_WVR_EL1(14),
+	DBG_BCR_BVR_WCR_WVR_EL1(15),
+
+	/* MDRAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+	  trap_raz_wi },
+	/* OSLAR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
+	  trap_raz_wi },
+	/* OSLSR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
+	  trap_oslsr_el1 },
+	/* OSDLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGPRCR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
+	  trap_raz_wi },
+	/* DBGCLAIMSET_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGCLAIMCLR_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
+	  trap_raz_wi },
+	/* DBGAUTHSTATUS_EL1 */
+	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
+	  trap_dbgauthstatus_el1 },
+
 	/* TEECR32_EL1 */
 	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
 	  NULL, reset_val, TEECR32_EL1, 0 },
 	/* TEEHBR32_EL1 */
 	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
 	  NULL, reset_val, TEEHBR32_EL1, 0 },
+
+	/* MDCCSR_EL1 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
+	  trap_raz_wi },
+	/* DBGDTR[TR]X_EL0 */
+	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
+	  trap_raz_wi },
+
 	/* DBGVCR32_EL2 */
 	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
 	  NULL, reset_val, DBGVCR32_EL2, 0 },

From d62af886035cc34e73e82e06179951f6c4e72c6d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:27:13 +0100
Subject: [PATCH 219/287] arm64: KVM: common infrastructure for handling
 AArch32 CP14/CP15

As we're about to trap a bunch of CP14 registers, let's rework
the CP15 handling so it can be generalized and work with multiple
tables.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 72564016aae45f42e488f926bc803f9a2e1c771c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h    |   2 +-
 arch/arm64/include/asm/kvm_coproc.h |   3 +-
 arch/arm64/include/asm/kvm_host.h   |  13 ++-
 arch/arm64/kvm/handle_exit.c        |   4 +-
 arch/arm64/kvm/sys_regs.c           | 133 +++++++++++++++++++++++-----
 5 files changed, 124 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 660f75c48bbb..69027ded5006 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -95,7 +95,7 @@
 #define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
 #define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-#define NR_CP15_REGS	(NR_SYS_REGS * 2)
+#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
 
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
index 9a59301cd014..0b52377a6c11 100644
--- a/arch/arm64/include/asm/kvm_coproc.h
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -39,7 +39,8 @@ void kvm_register_target_sys_reg_table(unsigned int target,
 				       struct kvm_sys_reg_target_table *table);
 
 int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8e410f761918..79812be4f25f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -86,7 +86,7 @@ struct kvm_cpu_context {
 	struct kvm_regs	gp_regs;
 	union {
 		u64 sys_regs[NR_SYS_REGS];
-		u32 cp15[NR_CP15_REGS];
+		u32 copro[NR_COPRO_REGS];
 	};
 };
 
@@ -141,12 +141,17 @@ struct kvm_vcpu_arch {
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
 #define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
-#define vcpu_cp15(v,r)		((v)->arch.ctxt.cp15[(r)])
+/*
+ * CP14 and CP15 live in the same array, as they are backed by the
+ * same system registers.
+ */
+#define vcpu_cp14(v,r)		((v)->arch.ctxt.copro[(r)])
+#define vcpu_cp15(v,r)		((v)->arch.ctxt.copro[(r)])
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 1)])
+#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)])
 #else
-#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.cp15[((r) + 0)])
+#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)])
 #endif
 
 struct kvm_vm_stat {
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 182415e1a952..e28be510380c 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -73,9 +73,9 @@ static exit_handle_fn arm_exit_handlers[] = {
 	[ESR_EL2_EC_WFI]	= kvm_handle_wfx,
 	[ESR_EL2_EC_CP15_32]	= kvm_handle_cp15_32,
 	[ESR_EL2_EC_CP15_64]	= kvm_handle_cp15_64,
-	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_MR]	= kvm_handle_cp14_32,
 	[ESR_EL2_EC_CP14_LS]	= kvm_handle_cp14_load_store,
-	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_access,
+	[ESR_EL2_EC_CP14_64]	= kvm_handle_cp14_64,
 	[ESR_EL2_EC_HVC32]	= handle_hvc,
 	[ESR_EL2_EC_SMC32]	= handle_smc,
 	[ESR_EL2_EC_HVC64]	= handle_hvc,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d53ce430b178..266afd972ad3 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -494,6 +494,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_val, FPEXC32_EL2, 0x70 },
 };
 
+/* Trapped cp14 registers */
+static const struct sys_reg_desc cp14_regs[] = {
+};
+
 /*
  * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
  * depending on the way they are accessed (as a 32bit or a 64bit
@@ -601,26 +605,29 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	return 1;
 }
 
-int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+/*
+ * emulate_cp --  tries to match a sys_reg access in a handling table, and
+ *                call the corresponding trap handler.
+ *
+ * @params: pointer to the descriptor of the access
+ * @table: array of trap descriptors
+ * @num: size of the trap descriptor array
+ *
+ * Return 0 if the access has been handled, and -1 if not.
+ */
+static int emulate_cp(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_params *params,
+		      const struct sys_reg_desc *table,
+		      size_t num)
 {
-	kvm_inject_undefined(vcpu);
-	return 1;
-}
+	const struct sys_reg_desc *r;
 
-static void emulate_cp15(struct kvm_vcpu *vcpu,
-			 const struct sys_reg_params *params)
-{
-	size_t num;
-	const struct sys_reg_desc *table, *r;
+	if (!table)
+		return -1;	/* Not handled */
 
-	table = get_target_table(vcpu->arch.target, false, &num);
-
-	/* Search target-specific then generic table. */
 	r = find_reg(params, table, num);
-	if (!r)
-		r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs));
 
-	if (likely(r)) {
+	if (r) {
 		/*
 		 * Not having an accessor means that we have
 		 * configured a trap that we don't know how to
@@ -632,22 +639,51 @@ static void emulate_cp15(struct kvm_vcpu *vcpu,
 		if (likely(r->access(vcpu, params, r))) {
 			/* Skip instruction, since it was emulated */
 			kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
-			return;
 		}
-		/* If access function fails, it should complain. */
+
+		/* Handled */
+		return 0;
 	}
 
-	kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu));
+	/* Not handled */
+	return -1;
+}
+
+static void unhandled_cp_access(struct kvm_vcpu *vcpu,
+				struct sys_reg_params *params)
+{
+	u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+	int cp;
+
+	switch(hsr_ec) {
+	case ESR_EL2_EC_CP15_32:
+	case ESR_EL2_EC_CP15_64:
+		cp = 15;
+		break;
+	case ESR_EL2_EC_CP14_MR:
+	case ESR_EL2_EC_CP14_64:
+		cp = 14;
+		break;
+	default:
+		WARN_ON((cp = -1));
+	}
+
+	kvm_err("Unsupported guest CP%d access at: %08lx\n",
+		cp, *vcpu_pc(vcpu));
 	print_sys_reg_instr(params);
 	kvm_inject_undefined(vcpu);
 }
 
 /**
- * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
-int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
@@ -676,8 +712,14 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		*vcpu_reg(vcpu, params.Rt) = val;
 	}
 
-	emulate_cp15(vcpu, &params);
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		goto out;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		goto out;
 
+	unhandled_cp_access(vcpu, &params);
+
+out:
 	/* Do the opposite hack for the read side */
 	if (!params.is_write) {
 		u64 val = *vcpu_reg(vcpu, params.Rt);
@@ -693,7 +735,11 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
  * @vcpu: The VCPU pointer
  * @run:  The kvm_run struct
  */
-int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+			    const struct sys_reg_desc *global,
+			    size_t nr_global,
+			    const struct sys_reg_desc *target_specific,
+			    size_t nr_specific)
 {
 	struct sys_reg_params params;
 	u32 hsr = kvm_vcpu_get_hsr(vcpu);
@@ -708,10 +754,51 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	params.Op1 = (hsr >> 14) & 0x7;
 	params.Op2 = (hsr >> 17) & 0x7;
 
-	emulate_cp15(vcpu, &params);
+	if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+		return 1;
+	if (!emulate_cp(vcpu, &params, global, nr_global))
+		return 1;
+
+	unhandled_cp_access(vcpu, &params);
 	return 1;
 }
 
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_64(vcpu,
+				cp15_regs, ARRAY_SIZE(cp15_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	const struct sys_reg_desc *target_specific;
+	size_t num;
+
+	target_specific = get_target_table(vcpu->arch.target, false, &num);
+	return kvm_handle_cp_32(vcpu,
+				cp15_regs, ARRAY_SIZE(cp15_regs),
+				target_specific, num);
+}
+
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_64(vcpu,
+				cp14_regs, ARRAY_SIZE(cp14_regs),
+				NULL, 0);
+}
+
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	return kvm_handle_cp_32(vcpu,
+				cp14_regs, ARRAY_SIZE(cp14_regs),
+				NULL, 0);
+}
+
 static int emulate_sys_reg(struct kvm_vcpu *vcpu,
 			   const struct sys_reg_params *params)
 {

From 693292e6a93a775a91421b16f87f632e5d7f077b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 14:11:48 +0100
Subject: [PATCH 220/287] arm64: KVM: use separate tables for AArch32 32 and
 64bit traps

An interesting "feature" of the CP14 encoding is that there is
an overlap between 32 and 64bit registers, meaning they cannot
live in the same table as we did for CP15.

Create separate tables for 64bit CP14 and CP15 registers, and
let the top level handler use the right one.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit a9866ba0cddfc497335fa02a175c4578b96722ff)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 266afd972ad3..499a351fd1b9 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -498,13 +498,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 static const struct sys_reg_desc cp14_regs[] = {
 };
 
+/* Trapped cp14 64bit registers */
+static const struct sys_reg_desc cp14_64_regs[] = {
+};
+
 /*
  * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
  * depending on the way they are accessed (as a 32bit or a 64bit
  * register).
  */
 static const struct sys_reg_desc cp15_regs[] = {
-	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -545,6 +548,10 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
+};
+
+static const struct sys_reg_desc cp15_64_regs[] = {
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
 };
 
@@ -770,7 +777,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	target_specific = get_target_table(vcpu->arch.target, false, &num);
 	return kvm_handle_cp_64(vcpu,
-				cp15_regs, ARRAY_SIZE(cp15_regs),
+				cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
 				target_specific, num);
 }
 
@@ -788,7 +795,7 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
 int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	return kvm_handle_cp_64(vcpu,
-				cp14_regs, ARRAY_SIZE(cp14_regs),
+				cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
 				NULL, 0);
 }
 

From 2c4bc5c6442250ebe7a1b08ed297fc48aef99547 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 7 May 2014 13:43:39 +0100
Subject: [PATCH 221/287] arm64: KVM: check ordering of all system register
 tables

We now have multiple tables for the various system registers
we trap. Make sure we check the order of all of them, as it is
critical that we get the order right (been there, done that...).

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit e6a9551760623d1703487e8a16bb9c3ea8a7e7a8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/sys_regs.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 499a351fd1b9..8ab47c7326ed 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1308,14 +1308,32 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 	return write_demux_regids(uindices);
 }
 
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
+{
+	unsigned int i;
+
+	for (i = 1; i < n; i++) {
+		if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+			kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 void kvm_sys_reg_table_init(void)
 {
 	unsigned int i;
 	struct sys_reg_desc clidr;
 
 	/* Make sure tables are unique and in order. */
-	for (i = 1; i < ARRAY_SIZE(sys_reg_descs); i++)
-		BUG_ON(cmp_sys_reg(&sys_reg_descs[i-1], &sys_reg_descs[i]) >= 0);
+	BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
+	BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
+	BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
+	BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+	BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
+	BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
 
 	/* We abuse the reset function to overwrite the table itself. */
 	for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)

From c85f50a20b43754698af82b27279ca87cb04bd3a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:31:37 +0100
Subject: [PATCH 222/287] arm64: KVM: add trap handlers for AArch32 debug
 registers

Add handlers for all the AArch32 debug registers that are accessible
from EL0 or EL1. The code follow the same strategy as the AArch64
counterpart with regards to tracking the dirty state of the debug
registers.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit bdfb4b389c8d8f07e2d5b8e1291e01c789ba4aad)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_asm.h |   9 ++
 arch/arm64/kvm/sys_regs.c        | 144 ++++++++++++++++++++++++++++++-
 2 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 69027ded5006..483842180f8f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -95,6 +95,15 @@
 #define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
 #define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+
+#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
+#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
+
 #define NR_COPRO_REGS	(NR_SYS_REGS * 2)
 
 #define ARM_EXCEPTION_IRQ	  0
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 8ab47c7326ed..a4fd5267c65b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -494,12 +494,153 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	  NULL, reset_val, FPEXC32_EL2, 0x70 },
 };
 
-/* Trapped cp14 registers */
+static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+			const struct sys_reg_params *p,
+			const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		return ignore_write(vcpu, p);
+	} else {
+		u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
+		u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
+		u32 el3 = !!((pfr >> 12) & 0xf);
+
+		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
+					  (((dfr >> 12) & 0xf) << 24) |
+					  (((dfr >> 28) & 0xf) << 20) |
+					  (6 << 16) | (el3 << 14) | (el3 << 12));
+		return true;
+	}
+}
+
+static bool trap_debug32(struct kvm_vcpu *vcpu,
+			 const struct sys_reg_params *p,
+			 const struct sys_reg_desc *r)
+{
+	if (p->is_write) {
+		vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+	} else {
+		*vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+	}
+
+	return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n)					\
+	/* DBGBVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32,	\
+	  NULL, (cp14_DBGBVR0 + (n) * 2) },			\
+	/* DBGBCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32,	\
+	  NULL, (cp14_DBGBCR0 + (n) * 2) },			\
+	/* DBGWVRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32,	\
+	  NULL, (cp14_DBGWVR0 + (n) * 2) },			\
+	/* DBGWCRn */						\
+	{ Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32,	\
+	  NULL, (cp14_DBGWCR0 + (n) * 2) }
+
+#define DBGBXVR(n)						\
+	{ Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32,	\
+	  NULL, cp14_DBGBXVR0 + n * 2 }
+
+/*
+ * Trapped cp14 registers. We generally ignore most of the external
+ * debug, on the principle that they don't really make sense to a
+ * guest. Revisit this one day, whould this principle change.
+ */
 static const struct sys_reg_desc cp14_regs[] = {
+	/* DBGIDR */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+	/* DBGDTRRXext */
+	{ Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
+
+	DBG_BCR_BVR_WCR_WVR(0),
+	/* DBGDSCRint */
+	{ Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(1),
+	/* DBGDCCINT */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 },
+	/* DBGDSCRext */
+	{ Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(2),
+	/* DBGDTR[RT]Xint */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
+	/* DBGDTR[RT]Xext */
+	{ Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(3),
+	DBG_BCR_BVR_WCR_WVR(4),
+	DBG_BCR_BVR_WCR_WVR(5),
+	/* DBGWFAR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
+	/* DBGOSECCR */
+	{ Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
+	DBG_BCR_BVR_WCR_WVR(6),
+	/* DBGVCR */
+	{ Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 },
+	DBG_BCR_BVR_WCR_WVR(7),
+	DBG_BCR_BVR_WCR_WVR(8),
+	DBG_BCR_BVR_WCR_WVR(9),
+	DBG_BCR_BVR_WCR_WVR(10),
+	DBG_BCR_BVR_WCR_WVR(11),
+	DBG_BCR_BVR_WCR_WVR(12),
+	DBG_BCR_BVR_WCR_WVR(13),
+	DBG_BCR_BVR_WCR_WVR(14),
+	DBG_BCR_BVR_WCR_WVR(15),
+
+	/* DBGDRAR (32bit) */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
+
+	DBGBXVR(0),
+	/* DBGOSLAR */
+	{ Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+	DBGBXVR(1),
+	/* DBGOSLSR */
+	{ Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+	DBGBXVR(2),
+	DBGBXVR(3),
+	/* DBGOSDLR */
+	{ Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
+	DBGBXVR(4),
+	/* DBGPRCR */
+	{ Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
+	DBGBXVR(5),
+	DBGBXVR(6),
+	DBGBXVR(7),
+	DBGBXVR(8),
+	DBGBXVR(9),
+	DBGBXVR(10),
+	DBGBXVR(11),
+	DBGBXVR(12),
+	DBGBXVR(13),
+	DBGBXVR(14),
+	DBGBXVR(15),
+
+	/* DBGDSAR (32bit) */
+	{ Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
+
+	/* DBGDEVID2 */
+	{ Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
+	/* DBGDEVID1 */
+	{ Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
+	/* DBGDEVID */
+	{ Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
+	/* DBGCLAIMSET */
+	{ Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
+	/* DBGCLAIMCLR */
+	{ Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
+	/* DBGAUTHSTATUS */
+	{ Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
 };
 
 /* Trapped cp14 64bit registers */
 static const struct sys_reg_desc cp14_64_regs[] = {
+	/* DBGDRAR (64bit) */
+	{ Op1( 0), CRm( 1), .access = trap_raz_wi },
+
+	/* DBGDSAR (64bit) */
+	{ Op1( 0), CRm( 2), .access = trap_raz_wi },
 };
 
 /*
@@ -547,7 +688,6 @@ static const struct sys_reg_desc cp15_regs[] = {
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
-
 };
 
 static const struct sys_reg_desc cp15_64_regs[] = {

From 44af263503e0e31b1f88ce86df73e467f18e3198 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 7 May 2014 13:44:49 +0100
Subject: [PATCH 223/287] arm64: KVM: implement lazy world switch for debug
 registers

Implement switching of the debug registers. While the number
of registers is massive, CPUs usually don't implement them all
(A57 has 6 breakpoints and 4 watchpoints, which gives us a total
of 22 registers "only").

Also, we only save/restore them when MDSCR_EL1 has debug enabled,
or when we've flagged the debug registers as dirty. It means that
most of the time, we only save/restore MDSCR_EL1.

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit b0e626b380872b663918230fafdac128c34fea56)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kernel/asm-offsets.c |   1 +
 arch/arm64/kvm/hyp.S            | 463 +++++++++++++++++++++++++++++++-
 2 files changed, 458 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 65ebb2ccde5f..825d76c21d84 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -118,6 +118,7 @@ int main(void)
   DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+  DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
   DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
   DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 7874e022d077..100494b5c7d4 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -20,6 +20,7 @@
 #include <asm/assembler.h>
 #include <asm/memory.h>
 #include <asm/asm-offsets.h>
+#include <asm/debug-monitors.h>
 #include <asm/fpsimdmacros.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
@@ -211,6 +212,7 @@
 	mrs	x22, 	amair_el1
 	mrs	x23, 	cntkctl_el1
 	mrs	x24,	par_el1
+	mrs	x25,	mdscr_el1
 
 	stp	x4, x5, [x3]
 	stp	x6, x7, [x3, #16]
@@ -222,7 +224,202 @@
 	stp	x18, x19, [x3, #112]
 	stp	x20, x21, [x3, #128]
 	stp	x22, x23, [x3, #144]
-	str	x24, [x3, #160]
+	stp	x24, x25, [x3, #160]
+.endm
+
+.macro save_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbcr15_el1
+	mrs	x19, dbgbcr14_el1
+	mrs	x18, dbgbcr13_el1
+	mrs	x17, dbgbcr12_el1
+	mrs	x16, dbgbcr11_el1
+	mrs	x15, dbgbcr10_el1
+	mrs	x14, dbgbcr9_el1
+	mrs	x13, dbgbcr8_el1
+	mrs	x12, dbgbcr7_el1
+	mrs	x11, dbgbcr6_el1
+	mrs	x10, dbgbcr5_el1
+	mrs	x9, dbgbcr4_el1
+	mrs	x8, dbgbcr3_el1
+	mrs	x7, dbgbcr2_el1
+	mrs	x6, dbgbcr1_el1
+	mrs	x5, dbgbcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgbvr15_el1
+	mrs	x19, dbgbvr14_el1
+	mrs	x18, dbgbvr13_el1
+	mrs	x17, dbgbvr12_el1
+	mrs	x16, dbgbvr11_el1
+	mrs	x15, dbgbvr10_el1
+	mrs	x14, dbgbvr9_el1
+	mrs	x13, dbgbvr8_el1
+	mrs	x12, dbgbvr7_el1
+	mrs	x11, dbgbvr6_el1
+	mrs	x10, dbgbvr5_el1
+	mrs	x9, dbgbvr4_el1
+	mrs	x8, dbgbvr3_el1
+	mrs	x7, dbgbvr2_el1
+	mrs	x6, dbgbvr1_el1
+	mrs	x5, dbgbvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwcr15_el1
+	mrs	x19, dbgwcr14_el1
+	mrs	x18, dbgwcr13_el1
+	mrs	x17, dbgwcr12_el1
+	mrs	x16, dbgwcr11_el1
+	mrs	x15, dbgwcr10_el1
+	mrs	x14, dbgwcr9_el1
+	mrs	x13, dbgwcr8_el1
+	mrs	x12, dbgwcr7_el1
+	mrs	x11, dbgwcr6_el1
+	mrs	x10, dbgwcr5_el1
+	mrs	x9, dbgwcr4_el1
+	mrs	x8, dbgwcr3_el1
+	mrs	x7, dbgwcr2_el1
+	mrs	x6, dbgwcr1_el1
+	mrs	x5, dbgwcr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	mrs	x20, dbgwvr15_el1
+	mrs	x19, dbgwvr14_el1
+	mrs	x18, dbgwvr13_el1
+	mrs	x17, dbgwvr12_el1
+	mrs	x16, dbgwvr11_el1
+	mrs	x15, dbgwvr10_el1
+	mrs	x14, dbgwvr9_el1
+	mrs	x13, dbgwvr8_el1
+	mrs	x12, dbgwvr7_el1
+	mrs	x11, dbgwvr6_el1
+	mrs	x10, dbgwvr5_el1
+	mrs	x9, dbgwvr4_el1
+	mrs	x8, dbgwvr3_el1
+	mrs	x7, dbgwvr2_el1
+	mrs	x6, dbgwvr1_el1
+	mrs	x5, dbgwvr0_el1
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+
+1:
+	str	x20, [x3, #(15 * 8)]
+	str	x19, [x3, #(14 * 8)]
+	str	x18, [x3, #(13 * 8)]
+	str	x17, [x3, #(12 * 8)]
+	str	x16, [x3, #(11 * 8)]
+	str	x15, [x3, #(10 * 8)]
+	str	x14, [x3, #(9 * 8)]
+	str	x13, [x3, #(8 * 8)]
+	str	x12, [x3, #(7 * 8)]
+	str	x11, [x3, #(6 * 8)]
+	str	x10, [x3, #(5 * 8)]
+	str	x9, [x3, #(4 * 8)]
+	str	x8, [x3, #(3 * 8)]
+	str	x7, [x3, #(2 * 8)]
+	str	x6, [x3, #(1 * 8)]
+	str	x5, [x3, #(0 * 8)]
+
+	mrs	x21, mdccint_el1
+	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
 .endm
 
 .macro restore_sysregs
@@ -241,7 +438,7 @@
 	ldp	x18, x19, [x3, #112]
 	ldp	x20, x21, [x3, #128]
 	ldp	x22, x23, [x3, #144]
-	ldr	x24, [x3, #160]
+	ldp	x24, x25, [x3, #160]
 
 	msr	vmpidr_el2,	x4
 	msr	csselr_el1,	x5
@@ -264,6 +461,198 @@
 	msr	amair_el1,	x22
 	msr	cntkctl_el1,	x23
 	msr	par_el1,	x24
+	msr	mdscr_el1,	x25
+.endm
+
+.macro restore_debug
+	// x2: base address for cpu context
+	// x3: tmp register
+
+	mrs	x26, id_aa64dfr0_el1
+	ubfx	x24, x26, #12, #4	// Extract BRPs
+	ubfx	x25, x26, #20, #4	// Extract WRPs
+	mov	w26, #15
+	sub	w24, w26, w24		// How many BPs to skip
+	sub	w25, w26, w25		// How many WPs to skip
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbcr15_el1, x20
+	msr	dbgbcr14_el1, x19
+	msr	dbgbcr13_el1, x18
+	msr	dbgbcr12_el1, x17
+	msr	dbgbcr11_el1, x16
+	msr	dbgbcr10_el1, x15
+	msr	dbgbcr9_el1, x14
+	msr	dbgbcr8_el1, x13
+	msr	dbgbcr7_el1, x12
+	msr	dbgbcr6_el1, x11
+	msr	dbgbcr5_el1, x10
+	msr	dbgbcr4_el1, x9
+	msr	dbgbcr3_el1, x8
+	msr	dbgbcr2_el1, x7
+	msr	dbgbcr1_el1, x6
+	msr	dbgbcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x24, lsl #2
+	br	x26
+1:
+	msr	dbgbvr15_el1, x20
+	msr	dbgbvr14_el1, x19
+	msr	dbgbvr13_el1, x18
+	msr	dbgbvr12_el1, x17
+	msr	dbgbvr11_el1, x16
+	msr	dbgbvr10_el1, x15
+	msr	dbgbvr9_el1, x14
+	msr	dbgbvr8_el1, x13
+	msr	dbgbvr7_el1, x12
+	msr	dbgbvr6_el1, x11
+	msr	dbgbvr5_el1, x10
+	msr	dbgbvr4_el1, x9
+	msr	dbgbvr3_el1, x8
+	msr	dbgbvr2_el1, x7
+	msr	dbgbvr1_el1, x6
+	msr	dbgbvr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwcr15_el1, x20
+	msr	dbgwcr14_el1, x19
+	msr	dbgwcr13_el1, x18
+	msr	dbgwcr12_el1, x17
+	msr	dbgwcr11_el1, x16
+	msr	dbgwcr10_el1, x15
+	msr	dbgwcr9_el1, x14
+	msr	dbgwcr8_el1, x13
+	msr	dbgwcr7_el1, x12
+	msr	dbgwcr6_el1, x11
+	msr	dbgwcr5_el1, x10
+	msr	dbgwcr4_el1, x9
+	msr	dbgwcr3_el1, x8
+	msr	dbgwcr2_el1, x7
+	msr	dbgwcr1_el1, x6
+	msr	dbgwcr0_el1, x5
+
+	add	x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	ldr	x20, [x3, #(15 * 8)]
+	ldr	x19, [x3, #(14 * 8)]
+	ldr	x18, [x3, #(13 * 8)]
+	ldr	x17, [x3, #(12 * 8)]
+	ldr	x16, [x3, #(11 * 8)]
+	ldr	x15, [x3, #(10 * 8)]
+	ldr	x14, [x3, #(9 * 8)]
+	ldr	x13, [x3, #(8 * 8)]
+	ldr	x12, [x3, #(7 * 8)]
+	ldr	x11, [x3, #(6 * 8)]
+	ldr	x10, [x3, #(5 * 8)]
+	ldr	x9, [x3, #(4 * 8)]
+	ldr	x8, [x3, #(3 * 8)]
+	ldr	x7, [x3, #(2 * 8)]
+	ldr	x6, [x3, #(1 * 8)]
+	ldr	x5, [x3, #(0 * 8)]
+
+	adr	x26, 1f
+	add	x26, x26, x25, lsl #2
+	br	x26
+1:
+	msr	dbgwvr15_el1, x20
+	msr	dbgwvr14_el1, x19
+	msr	dbgwvr13_el1, x18
+	msr	dbgwvr12_el1, x17
+	msr	dbgwvr11_el1, x16
+	msr	dbgwvr10_el1, x15
+	msr	dbgwvr9_el1, x14
+	msr	dbgwvr8_el1, x13
+	msr	dbgwvr7_el1, x12
+	msr	dbgwvr6_el1, x11
+	msr	dbgwvr5_el1, x10
+	msr	dbgwvr4_el1, x9
+	msr	dbgwvr3_el1, x8
+	msr	dbgwvr2_el1, x7
+	msr	dbgwvr1_el1, x6
+	msr	dbgwvr0_el1, x5
+
+	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+	msr	mdccint_el1, x21
 .endm
 
 .macro skip_32bit_state tmp, target
@@ -278,6 +667,35 @@
 	tbz	\tmp, #12, \target
 .endm
 
+.macro skip_debug_state tmp, target
+	ldr	\tmp, [x0, #VCPU_DEBUG_FLAGS]
+	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
+.endm
+
+.macro compute_debug_state target
+	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
+	// is set, we do a full save/restore cycle and disable trapping.
+	add	x25, x0, #VCPU_CONTEXT
+
+	// Check the state of MDSCR_EL1
+	ldr	x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
+	and	x26, x25, #DBG_MDSCR_KDE
+	and	x25, x25, #DBG_MDSCR_MDE
+	adds	xzr, x25, x26
+	b.eq	9998f		// Nothing to see there
+
+	// If any interesting bits was set, we must set the flag
+	mov	x26, #KVM_ARM64_DEBUG_DIRTY
+	str	x26, [x0, #VCPU_DEBUG_FLAGS]
+	b	9999f		// Don't skip restore
+
+9998:
+	// Otherwise load the flags from memory in case we recently
+	// trapped
+	skip_debug_state x25, \target
+9999:
+.endm
+
 .macro save_guest_32bit_state
 	skip_32bit_state x3, 1f
 
@@ -293,10 +711,13 @@
 	mrs	x4, dacr32_el2
 	mrs	x5, ifsr32_el2
 	mrs	x6, fpexc32_el2
-	mrs	x7, dbgvcr32_el2
 	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
+	str	x6, [x3, #16]
 
+	skip_debug_state x8, 2f
+	mrs	x7, dbgvcr32_el2
+	str	x7, [x3, #24]
+2:
 	skip_tee_state x8, 1f
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
@@ -319,12 +740,15 @@
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
 	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
+	ldr	x6, [x3, #16]
 	msr	dacr32_el2, x4
 	msr	ifsr32_el2, x5
 	msr	fpexc32_el2, x6
-	msr	dbgvcr32_el2, x7
 
+	skip_debug_state x8, 2f
+	ldr	x7, [x3, #24]
+	msr	dbgvcr32_el2, x7
+2:
 	skip_tee_state x8, 1f
 
 	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
@@ -463,6 +887,14 @@ __restore_sysregs:
 	restore_sysregs
 	ret
 
+__save_debug:
+	save_debug
+	ret
+
+__restore_debug:
+	restore_debug
+	ret
+
 __save_fpsimd:
 	save_fpsimd
 	ret
@@ -494,6 +926,9 @@ ENTRY(__kvm_vcpu_run)
 	bl __save_fpsimd
 	bl __save_sysregs
 
+	compute_debug_state 1f
+	bl	__save_debug
+1:
 	activate_traps
 	activate_vm
 
@@ -505,6 +940,10 @@ ENTRY(__kvm_vcpu_run)
 
 	bl __restore_sysregs
 	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	bl	__restore_debug
+1:
 	restore_guest_32bit_state
 	restore_guest_regs
 
@@ -521,6 +960,10 @@ __kvm_vcpu_return:
 	save_guest_regs
 	bl __save_fpsimd
 	bl __save_sysregs
+
+	skip_debug_state x3, 1f
+	bl	__save_debug
+1:
 	save_guest_32bit_state
 
 	save_timer_state
@@ -535,6 +978,14 @@ __kvm_vcpu_return:
 
 	bl __restore_sysregs
 	bl __restore_fpsimd
+
+	skip_debug_state x3, 1f
+	// Clear the dirty flag for the next run, as all the state has
+	// already been saved. Note that we nuke the whole 64bit word.
+	// If we ever add more flags, we'll have to be more careful...
+	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
+	bl	__restore_debug
+1:
 	restore_host_regs
 
 	mov	x0, x1

From 99999d6ea6ff7b0b02a4ac6abef0d49ec1fd9fcb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 24 Apr 2014 10:32:03 +0100
Subject: [PATCH 224/287] arm64: KVM: enable trapping of all debug registers

Enable trapping of the debug registers, preventing the guests to
mess with the host state (and allowing guests to use the debug
infrastructure as well).

Reviewed-by: Anup Patel <anup.patel@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit d329de09333aeee127aaf22eb7cee9c2dc4cf475)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/hyp.S | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 100494b5c7d4..b72aa9f9215c 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -770,6 +770,14 @@
 	mrs	x2, mdcr_el2
 	and	x2, x2, #MDCR_EL2_HPMN_MASK
 	orr	x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+	orr	x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
+
+	// Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
+	// if not dirty.
+	ldr	x3, [x0, #VCPU_DEBUG_FLAGS]
+	tbnz	x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
+	orr	x2, x2,  #MDCR_EL2_TDA
+1:
 	msr	mdcr_el2, x2
 .endm
 

From 38ca7b90842e554439e2291803cdb0eb850b4807 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Tue, 24 Jun 2014 19:43:15 +0100
Subject: [PATCH 225/287] ARM: make it easier to check the CPU part number
 correctly

Ensure that platform maintainers check the CPU part number in the right
manner: the CPU part number is meaningless without also checking the
CPU implement(e|o)r (choose your preferred spelling!)  Provide an
interface which returns both the implementer and part number together,
and update the definitions to include the implementer.

Mark the old function as being deprecated... indeed, using the old
function with the definitions will now always evaluate as false, so
people must update their un-merged code to the new function.  While
this could be avoided by adding new definitions, we'd also have to
create new names for them which would be awkward.

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
(cherry picked from commit af040ffc9ba1e079ee4c0748aff64fa3d4716fa5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/cputype.h   | 35 +++++++++++------
 arch/arm/include/asm/smp_scu.h   |  2 +-
 arch/arm/kernel/perf_event_cpu.c | 64 ++++++++++++++------------------
 arch/arm/kvm/guest.c             |  8 +---
 4 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index dba62cb1ad08..3392fe2d3174 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -43,15 +43,18 @@
 #define ARM_CPU_IMP_ARM			0x41
 #define ARM_CPU_IMP_INTEL		0x69
 
-#define ARM_CPU_PART_ARM1136		0xB360
-#define ARM_CPU_PART_ARM1156		0xB560
-#define ARM_CPU_PART_ARM1176		0xB760
-#define ARM_CPU_PART_ARM11MPCORE	0xB020
-#define ARM_CPU_PART_CORTEX_A8		0xC080
-#define ARM_CPU_PART_CORTEX_A9		0xC090
-#define ARM_CPU_PART_CORTEX_A5		0xC050
-#define ARM_CPU_PART_CORTEX_A15		0xC0F0
-#define ARM_CPU_PART_CORTEX_A7		0xC070
+/* ARM implemented processors */
+#define ARM_CPU_PART_ARM1136		0x4100b360
+#define ARM_CPU_PART_ARM1156		0x4100b560
+#define ARM_CPU_PART_ARM1176		0x4100b760
+#define ARM_CPU_PART_ARM11MPCORE	0x4100b020
+#define ARM_CPU_PART_CORTEX_A8		0x4100c080
+#define ARM_CPU_PART_CORTEX_A9		0x4100c090
+#define ARM_CPU_PART_CORTEX_A5		0x4100c050
+#define ARM_CPU_PART_CORTEX_A7		0x4100c070
+#define ARM_CPU_PART_CORTEX_A12		0x4100c0d0
+#define ARM_CPU_PART_CORTEX_A17		0x4100c0e0
+#define ARM_CPU_PART_CORTEX_A15		0x4100c0f0
 
 #define ARM_CPU_XSCALE_ARCH_MASK	0xe000
 #define ARM_CPU_XSCALE_ARCH_V1		0x2000
@@ -122,14 +125,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
 	return (read_cpuid_id() & 0xFF000000) >> 24;
 }
 
-static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+/*
+ * The CPU part number is meaningless without referring to the CPU
+ * implementer: implementers are free to define their own part numbers
+ * which are permitted to clash with other implementer part numbers.
+ */
+static inline unsigned int __attribute_const__ read_cpuid_part(void)
+{
+	return read_cpuid_id() & 0xff00fff0;
+}
+
+static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
 {
 	return read_cpuid_id() & 0xFFF0;
 }
 
 static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
 {
-	return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
+	return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 18d169373612..1a292d8be988 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -11,7 +11,7 @@
 
 static inline bool scu_a9_has_base(void)
 {
-	return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+	return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
 }
 
 static inline unsigned long scu_a9_get_base(void)
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 1f2740e3dbc0..0e9609657c79 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -201,49 +201,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
 static int probe_current_pmu(struct arm_pmu *pmu)
 {
 	int cpu = get_cpu();
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
 	int ret = -ENODEV;
 
 	pr_info("probing PMU on CPU %d\n", cpu);
 
+	switch (read_cpuid_part()) {
 	/* ARM Ltd CPUs. */
-	if (implementor == ARM_CPU_IMP_ARM) {
-		switch (part_number) {
-		case ARM_CPU_PART_ARM1136:
-		case ARM_CPU_PART_ARM1156:
-		case ARM_CPU_PART_ARM1176:
-			ret = armv6pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_ARM11MPCORE:
-			ret = armv6mpcore_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A8:
-			ret = armv7_a8_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A9:
-			ret = armv7_a9_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A5:
-			ret = armv7_a5_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A15:
-			ret = armv7_a15_pmu_init(pmu);
-			break;
-		case ARM_CPU_PART_CORTEX_A7:
-			ret = armv7_a7_pmu_init(pmu);
-			break;
-		}
-	/* Intel CPUs [xscale]. */
-	} else if (implementor == ARM_CPU_IMP_INTEL) {
-		switch (xscale_cpu_arch_version()) {
-		case ARM_CPU_XSCALE_ARCH_V1:
-			ret = xscale1pmu_init(pmu);
-			break;
-		case ARM_CPU_XSCALE_ARCH_V2:
-			ret = xscale2pmu_init(pmu);
-			break;
+	case ARM_CPU_PART_ARM1136:
+	case ARM_CPU_PART_ARM1156:
+	case ARM_CPU_PART_ARM1176:
+		ret = armv6pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_ARM11MPCORE:
+		ret = armv6mpcore_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A8:
+		ret = armv7_a8_pmu_init(pmu);
+		break;
+	case ARM_CPU_PART_CORTEX_A9:
+		ret = armv7_a9_pmu_init(pmu);
+		break;
+
+	default:
+		if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
+			switch (xscale_cpu_arch_version()) {
+			case ARM_CPU_XSCALE_ARCH_V1:
+				ret = xscale1pmu_init(pmu);
+				break;
+			case ARM_CPU_XSCALE_ARCH_V2:
+				ret = xscale2pmu_init(pmu);
+				break;
+			}
 		}
+		break;
 	}
 
 	put_cpu();
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 986e625b5dbd..813e49258690 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -264,13 +264,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 int __attribute_const__ kvm_target_cpu(void)
 {
-	unsigned long implementor = read_cpuid_implementor();
-	unsigned long part_number = read_cpuid_part_number();
-
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
-
-	switch (part_number) {
+	switch (read_cpuid_part()) {
 	case ARM_CPU_PART_CORTEX_A7:
 		return KVM_ARM_TARGET_CORTEX_A7;
 	case ARM_CPU_PART_CORTEX_A15:

From 2d05a876aea8c40db12b9b721be21773aad6057b Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Fri, 25 Jul 2014 06:27:03 -0700
Subject: [PATCH 226/287] kvm: Resolve missing-field-initializers warnings

Resolve missing-field-initializers warnings seen in W=2 kernel
builds by having macros generate more elaborated initializers.
That is enough to silence the warnings.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 25f97ff451a4aab534afc1290af97d23ea0b4fb3)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/irq_comm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index ced4a542a031..a228ee82bad2 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -323,13 +323,13 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 
 #define IOAPIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
-	  .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
+	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
 #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
 
 #ifdef CONFIG_X86
 #  define PIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
-	  .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
+	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
 #  define ROUTING_ENTRY2(irq) \
 	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
 #else

From 7b6de249336851f2fb8515b2f428bc9e56a8dbb1 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:27:35 +0200
Subject: [PATCH 227/287] KVM: Rename and add argument to check_extension

In preparation to make the check_extension function available to VM scope
we add a struct kvm * argument to the function header and rename the function
accordingly. It will still be called from the /dev/kvm fd, but with a NULL
argument for struct kvm *.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c         | 2 +-
 arch/ia64/kvm/kvm-ia64.c   | 2 +-
 arch/powerpc/kvm/powerpc.c | 2 +-
 arch/s390/kvm/kvm-s390.c   | 2 +-
 arch/x86/kvm/x86.c         | 2 +-
 include/linux/kvm_host.h   | 2 +-
 virt/kvm/kvm_main.c        | 6 +++---
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e2c2bfd4da95..0e3d3dc8eea2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -174,7 +174,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	}
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	switch (ext) {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 53f44bee9ebb..b48aa69e35e1 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 
 	int r;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 750835a4ef70..aacf3e35404f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -300,7 +300,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 54612d0e79dd..9133f869b070 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -127,7 +127,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 805c8e92cf66..46a8c74fd431 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2500,7 +2500,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
 	return r;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a3c83491a791..1fb1bd4593a5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -578,7 +578,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg);
 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
 
-int kvm_dev_ioctl_check_extension(long ext);
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 
 int kvm_get_dirty_log(struct kvm *kvm,
 			struct kvm_dirty_log *log, int *is_dirty);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3db56912caed..e3315356b69a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2564,7 +2564,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_dev_ioctl_check_extension_generic(long arg)
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 {
 	switch (arg) {
 	case KVM_CAP_USER_MEMORY:
@@ -2588,7 +2588,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 	default:
 		break;
 	}
-	return kvm_dev_ioctl_check_extension(arg);
+	return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
 static long kvm_dev_ioctl(struct file *filp,
@@ -2607,7 +2607,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm(arg);
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension_generic(arg);
+		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;

From c509fab667c6a02b0763a4343c73d134c6a09328 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:33:08 +0200
Subject: [PATCH 228/287] KVM: Allow KVM_CHECK_EXTENSION on the vm fd

The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately
on PPC some of the capabilities change depending on the way a VM was created.

So instead we need a way to expose capabilities as VM ioctl, so that we can
see which VM type we're using (HV or PR). To enable this, add the
KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 92b591a4c46b103ebd3fc0d03a084e1efd331253)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 Documentation/virtual/kvm/api.txt |  7 ++--
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/kvm_main.c               | 58 +++++++++++++++++--------------
 3 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 8d135672b69a..257a1f1eecc7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
 4.4 KVM_CHECK_EXTENSION
 
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
 Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
 Parameters: extension identifier (KVM_CAP_*)
 Returns: 0 if unsupported; 1 (or some other positive integer) if supported
 
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
 Generally 0 means no and 1 means yes, but some extensions may report
 additional information in the integer return value.
 
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
 
 4.5 KVM_GET_VCPU_MMAP_SIZE
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 521e4c0a08ac..1af686a82703 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -679,6 +679,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_EL1_32BIT 93
 #define KVM_CAP_EXT_EMUL_CPUID 95
 #define KVM_CAP_ARM_PSCI_0_2 102
+#define KVM_CAP_CHECK_EXTENSION_VM 105
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e3315356b69a..1edb15da7acf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2317,6 +2317,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	return 0;
 }
 
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+{
+	switch (arg) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
+	case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+	case KVM_CAP_SIGNAL_MSI:
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQFD_RESAMPLE:
+#endif
+	case KVM_CAP_CHECK_EXTENSION_VM:
+		return 1;
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQ_ROUTING:
+		return KVM_MAX_IRQ_ROUTES;
+#endif
+	default:
+		break;
+	}
+	return kvm_vm_ioctl_check_extension(kvm, arg);
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2480,6 +2508,9 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_CHECK_EXTENSION:
+		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -2564,33 +2595,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
-{
-	switch (arg) {
-	case KVM_CAP_USER_MEMORY:
-	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
-	case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
-	case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
-	case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQFD_RESAMPLE:
-#endif
-		return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQ_ROUTING:
-		return KVM_MAX_IRQ_ROUTES;
-#endif
-	default:
-		break;
-	}
-	return kvm_vm_ioctl_check_extension(kvm, arg);
-}
-
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {

From edff6ae9e04008800e518454cf7fa0efe3023252 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 25 Jul 2014 16:29:12 +0100
Subject: [PATCH 229/287] kvm: arm64: vgic: fix hyp panic with 64k pages on
 juno platform

If the physical address of GICV isn't page-aligned, then we end up
creating a stage-2 mapping of the page containing it, which causes us to
map neighbouring memory locations directly into the guest.

As an example, consider a platform with GICV at physical 0x2c02f000
running a 64k-page host kernel. If qemu maps this into the guest at
0x80010000, then guest physical addresses 0x80010000 - 0x8001efff will
map host physical region 0x2c020000 - 0x2c02efff. Accesses to these
physical regions may cause UNPREDICTABLE behaviour, for example, on the
Juno platform this will cause an SError exception to EL3, which brings
down the entire physical CPU resulting in RCU stalls / HYP panics / host
crashing / wasted weeks of debugging.

SBSA recommends that systems alias the 4k GICV across the bounding 64k
region, in which case GICV physical could be described as 0x2c020000 in
the above scenario.

This patch fixes the problem by failing the vgic probe if the physical
base address or the size of GICV aren't page-aligned. Note that this
generated a warning in dmesg about freeing enabled IRQs, so I had to
move the IRQ enabling later in the probe.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Joel Schopp <joel.schopp@amd.com>
Cc: Don Dutile <ddutile@redhat.com>
Acked-by: Peter Maydell <peter.maydell@linaro.org>
Acked-by: Joel Schopp <joel.schopp@amd.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 63afbe7a0ac184ef8485dac4914e87b211b5bfaa)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic-v2.c | 16 ++++++++++++++++
 virt/kvm/arm/vgic.c    |  4 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index d6c9c142f813..01124ef3690a 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -231,6 +231,22 @@ int vgic_v2_probe(struct device_node *vgic_node,
 		ret = -ENXIO;
 		goto out_unmap;
 	}
+
+	if (!PAGE_ALIGNED(vcpu_res.start)) {
+		kvm_err("GICV physical address 0x%llx not page aligned\n",
+			(unsigned long long)vcpu_res.start);
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+
+	if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+			(unsigned long long)resource_size(&vcpu_res),
+			PAGE_SIZE);
+		ret = -ENXIO;
+		goto out_unmap;
+	}
+
 	vgic->vcpu_base = vcpu_res.start;
 
 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index ede8f6466c95..73eba793b17f 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1587,11 +1587,11 @@ int kvm_vgic_hyp_init(void)
 		goto out_free_irq;
 	}
 
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
 	/* Callback into for arch code for setup */
 	vgic_arch_setup(vgic);
 
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
 	return 0;
 
 out_free_irq:

From 45104cfeec66c1314a2ae5e4cb436a9ace725e59 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 31 Jul 2014 14:16:39 +0100
Subject: [PATCH 230/287] arm64: KVM: GICv3: move system register access to
 msr_s/mrs_s

Commit 72c583951526 (arm64: gicv3: Allow GICv3 compilation with
older binutils) changed the way we express the GICv3 system registers,
but couldn't change the occurences used by KVM as the code wasn't
merged yet.

Just fix the accessors.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit f4c321eb268e932786c112e0b902ee942d91a336)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/kvm/vgic-v3-switch.S | 130 ++++++++++++++++----------------
 1 file changed, 65 insertions(+), 65 deletions(-)

diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
index 21e68f606a8f..d16046999e06 100644
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -48,11 +48,11 @@
 	dsb	st
 
 	// Save all interesting registers
-	mrs	x4, ICH_HCR_EL2
-	mrs	x5, ICH_VMCR_EL2
-	mrs	x6, ICH_MISR_EL2
-	mrs	x7, ICH_EISR_EL2
-	mrs	x8, ICH_ELSR_EL2
+	mrs_s	x4, ICH_HCR_EL2
+	mrs_s	x5, ICH_VMCR_EL2
+	mrs_s	x6, ICH_MISR_EL2
+	mrs_s	x7, ICH_EISR_EL2
+	mrs_s	x8, ICH_ELSR_EL2
 
 	str	w4, [x3, #VGIC_V3_CPU_HCR]
 	str	w5, [x3, #VGIC_V3_CPU_VMCR]
@@ -60,9 +60,9 @@
 	str	w7, [x3, #VGIC_V3_CPU_EISR]
 	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
 
-	msr	ICH_HCR_EL2, xzr
+	msr_s	ICH_HCR_EL2, xzr
 
-	mrs	x21, ICH_VTR_EL2
+	mrs_s	x21, ICH_VTR_EL2
 	mvn	w22, w21
 	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
 
@@ -71,22 +71,22 @@
 	br	x24
 
 1:
-	mrs	x20, ICH_LR15_EL2
-	mrs	x19, ICH_LR14_EL2
-	mrs	x18, ICH_LR13_EL2
-	mrs	x17, ICH_LR12_EL2
-	mrs	x16, ICH_LR11_EL2
-	mrs	x15, ICH_LR10_EL2
-	mrs	x14, ICH_LR9_EL2
-	mrs	x13, ICH_LR8_EL2
-	mrs	x12, ICH_LR7_EL2
-	mrs	x11, ICH_LR6_EL2
-	mrs	x10, ICH_LR5_EL2
-	mrs	x9, ICH_LR4_EL2
-	mrs	x8, ICH_LR3_EL2
-	mrs	x7, ICH_LR2_EL2
-	mrs	x6, ICH_LR1_EL2
-	mrs	x5, ICH_LR0_EL2
+	mrs_s	x20, ICH_LR15_EL2
+	mrs_s	x19, ICH_LR14_EL2
+	mrs_s	x18, ICH_LR13_EL2
+	mrs_s	x17, ICH_LR12_EL2
+	mrs_s	x16, ICH_LR11_EL2
+	mrs_s	x15, ICH_LR10_EL2
+	mrs_s	x14, ICH_LR9_EL2
+	mrs_s	x13, ICH_LR8_EL2
+	mrs_s	x12, ICH_LR7_EL2
+	mrs_s	x11, ICH_LR6_EL2
+	mrs_s	x10, ICH_LR5_EL2
+	mrs_s	x9, ICH_LR4_EL2
+	mrs_s	x8, ICH_LR3_EL2
+	mrs_s	x7, ICH_LR2_EL2
+	mrs_s	x6, ICH_LR1_EL2
+	mrs_s	x5, ICH_LR0_EL2
 
 	adr	x24, 1f
 	add	x24, x24, x23
@@ -113,34 +113,34 @@
 	tbnz	w21, #29, 6f	// 6 bits
 	tbz	w21, #30, 5f	// 5 bits
 				// 7 bits
-	mrs	x20, ICH_AP0R3_EL2
+	mrs_s	x20, ICH_AP0R3_EL2
 	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	mrs	x19, ICH_AP0R2_EL2
+	mrs_s	x19, ICH_AP0R2_EL2
 	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-6:	mrs	x18, ICH_AP0R1_EL2
+6:	mrs_s	x18, ICH_AP0R1_EL2
 	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-5:	mrs	x17, ICH_AP0R0_EL2
+5:	mrs_s	x17, ICH_AP0R0_EL2
 	str	w17, [x3, #VGIC_V3_CPU_AP0R]
 
 	tbnz	w21, #29, 6f	// 6 bits
 	tbz	w21, #30, 5f	// 5 bits
 				// 7 bits
-	mrs	x20, ICH_AP1R3_EL2
+	mrs_s	x20, ICH_AP1R3_EL2
 	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	mrs	x19, ICH_AP1R2_EL2
+	mrs_s	x19, ICH_AP1R2_EL2
 	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-6:	mrs	x18, ICH_AP1R1_EL2
+6:	mrs_s	x18, ICH_AP1R1_EL2
 	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-5:	mrs	x17, ICH_AP1R0_EL2
+5:	mrs_s	x17, ICH_AP1R0_EL2
 	str	w17, [x3, #VGIC_V3_CPU_AP1R]
 
 	// Restore SRE_EL1 access and re-enable SRE at EL1.
-	mrs	x5, ICC_SRE_EL2
+	mrs_s	x5, ICC_SRE_EL2
 	orr	x5, x5, #ICC_SRE_EL2_ENABLE
-	msr	ICC_SRE_EL2, x5
+	msr_s	ICC_SRE_EL2, x5
 	isb
 	mov	x5, #1
-	msr	ICC_SRE_EL1, x5
+	msr_s	ICC_SRE_EL1, x5
 .endm
 
 /*
@@ -150,7 +150,7 @@
 .macro	restore_vgic_v3_state
 	// Disable SRE_EL1 access. Necessary, otherwise
 	// ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
-	msr	ICC_SRE_EL1, xzr
+	msr_s	ICC_SRE_EL1, xzr
 	isb
 
 	// Compute the address of struct vgic_cpu
@@ -160,34 +160,34 @@
 	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
 	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
 
-	msr	ICH_HCR_EL2, x4
-	msr	ICH_VMCR_EL2, x5
+	msr_s	ICH_HCR_EL2, x4
+	msr_s	ICH_VMCR_EL2, x5
 
-	mrs	x21, ICH_VTR_EL2
+	mrs_s	x21, ICH_VTR_EL2
 
 	tbnz	w21, #29, 6f	// 6 bits
 	tbz	w21, #30, 5f	// 5 bits
 				// 7 bits
 	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	msr	ICH_AP1R3_EL2, x20
+	msr_s	ICH_AP1R3_EL2, x20
 	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-	msr	ICH_AP1R2_EL2, x19
+	msr_s	ICH_AP1R2_EL2, x19
 6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-	msr	ICH_AP1R1_EL2, x18
+	msr_s	ICH_AP1R1_EL2, x18
 5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
-	msr	ICH_AP1R0_EL2, x17
+	msr_s	ICH_AP1R0_EL2, x17
 
 	tbnz	w21, #29, 6f	// 6 bits
 	tbz	w21, #30, 5f	// 5 bits
 				// 7 bits
 	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	msr	ICH_AP0R3_EL2, x20
+	msr_s	ICH_AP0R3_EL2, x20
 	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-	msr	ICH_AP0R2_EL2, x19
+	msr_s	ICH_AP0R2_EL2, x19
 6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-	msr	ICH_AP0R1_EL2, x18
+	msr_s	ICH_AP0R1_EL2, x18
 5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
-	msr	ICH_AP0R0_EL2, x17
+	msr_s	ICH_AP0R0_EL2, x17
 
 	and	w22, w21, #0xf
 	mvn	w22, w21
@@ -220,22 +220,22 @@
 	br	x24
 
 1:
-	msr	ICH_LR15_EL2, x20
-	msr	ICH_LR14_EL2, x19
-	msr	ICH_LR13_EL2, x18
-	msr	ICH_LR12_EL2, x17
-	msr	ICH_LR11_EL2, x16
-	msr	ICH_LR10_EL2, x15
-	msr	ICH_LR9_EL2,  x14
-	msr	ICH_LR8_EL2,  x13
-	msr	ICH_LR7_EL2,  x12
-	msr	ICH_LR6_EL2,  x11
-	msr	ICH_LR5_EL2,  x10
-	msr	ICH_LR4_EL2,   x9
-	msr	ICH_LR3_EL2,   x8
-	msr	ICH_LR2_EL2,   x7
-	msr	ICH_LR1_EL2,   x6
-	msr	ICH_LR0_EL2,   x5
+	msr_s	ICH_LR15_EL2, x20
+	msr_s	ICH_LR14_EL2, x19
+	msr_s	ICH_LR13_EL2, x18
+	msr_s	ICH_LR12_EL2, x17
+	msr_s	ICH_LR11_EL2, x16
+	msr_s	ICH_LR10_EL2, x15
+	msr_s	ICH_LR9_EL2,  x14
+	msr_s	ICH_LR8_EL2,  x13
+	msr_s	ICH_LR7_EL2,  x12
+	msr_s	ICH_LR6_EL2,  x11
+	msr_s	ICH_LR5_EL2,  x10
+	msr_s	ICH_LR4_EL2,   x9
+	msr_s	ICH_LR3_EL2,   x8
+	msr_s	ICH_LR2_EL2,   x7
+	msr_s	ICH_LR1_EL2,   x6
+	msr_s	ICH_LR0_EL2,   x5
 
 	// Ensure that the above will have reached the
 	// (re)distributors. This ensure the guest will read
@@ -244,9 +244,9 @@
 	dsb	sy
 
 	// Prevent the guest from touching the GIC system registers
-	mrs	x5, ICC_SRE_EL2
+	mrs_s	x5, ICC_SRE_EL2
 	and	x5, x5, #~ICC_SRE_EL2_ENABLE
-	msr	ICC_SRE_EL2, x5
+	msr_s	ICC_SRE_EL2, x5
 .endm
 
 ENTRY(__save_vgic_v3_state)
@@ -260,7 +260,7 @@ ENTRY(__restore_vgic_v3_state)
 ENDPROC(__restore_vgic_v3_state)
 
 ENTRY(__vgic_v3_get_ich_vtr_el2)
-	mrs	x0, ICH_VTR_EL2
+	mrs_s	x0, ICH_VTR_EL2
 	ret
 ENDPROC(__vgic_v3_get_ich_vtr_el2)
 

From 099fcd90255453a5976a42c5f9421a6a7ad7ce3d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 31 Jul 2014 11:42:18 +0100
Subject: [PATCH 231/287] KVM: arm64: GICv3: mandate page-aligned GICV region

Just like GICv2 was fixed in 63afbe7a0ac1
(kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform),
mandate the GICV region to be both aligned on a page boundary and
its size to be a multiple of page size.

This prevents a guest from being able to poke at regions where we
have no idea what is sitting there.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit fb3ec67942e92e5713e05b7691b277d0a0c0575d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic-v3.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index f01d44685720..1c2c8eef0599 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -215,6 +215,22 @@ int vgic_v3_probe(struct device_node *vgic_node,
 		ret = -ENXIO;
 		goto out;
 	}
+
+	if (!PAGE_ALIGNED(vcpu_res.start)) {
+		kvm_err("GICV physical address 0x%llx not page aligned\n",
+			(unsigned long long)vcpu_res.start);
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+		kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+			(unsigned long long)resource_size(&vcpu_res),
+			PAGE_SIZE);
+		ret = -ENXIO;
+		goto out;
+	}
+
 	vgic->vcpu_base = vcpu_res.start;
 	vgic->vctrl_base = NULL;
 	vgic->type = VGIC_V3;

From dad44b58644918b98d32ed35f28e919e08b0f896 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 1 Aug 2014 12:00:36 +0100
Subject: [PATCH 232/287] arm64: KVM: fix 64bit CP15 VM access for 32bit guests

Commit f0a3eaff71b8 (ARM64: KVM: fix big endian issue in
access_vm_reg for 32bit guest) changed the way we handle CP15
VM accesses, so that all 64bit accesses are done via vcpu_sys_reg.

This looks like a good idea as it solves indianness issues in an
elegant way, except for one small detail: the register index is
doesn't refer to the same array! We end up corrupting some random
data structure instead.

Fix this by reverting to the original code, except for the introduction
of a vcpu_cp15_64_high macro that deals with the endianness thing.

Tested on Juno with 32bit SMP guests.

Cc: Victor Kamensky <victor.kamensky@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit dedf97e8ff2c7513b1370e36b56e08b6bd0f0290)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_host.h | 6 ++++--
 arch/arm64/kvm/sys_regs.c         | 7 +++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 79812be4f25f..e10c45a578e3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -149,9 +149,11 @@ struct kvm_vcpu_arch {
 #define vcpu_cp15(v,r)		((v)->arch.ctxt.copro[(r)])
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 1)])
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r))
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r) + 1)
 #else
-#define vcpu_cp15_64_low(v,r) ((v)->arch.ctxt.copro[((r) + 0)])
+#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r) + 1)
+#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r))
 #endif
 
 struct kvm_vm_stat {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index a4fd5267c65b..5805e7c4a4dd 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -135,10 +135,13 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
 	BUG_ON(!p->is_write);
 
 	val = *vcpu_reg(vcpu, p->Rt);
-	if (!p->is_aarch32 || !p->is_32bit)
+	if (!p->is_aarch32) {
 		vcpu_sys_reg(vcpu, r->reg) = val;
-	else
+	} else {
+		if (!p->is_32bit)
+			vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
 		vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+	}
 
 	return true;
 }

From 74afc9fff6cdb9df6cd6ee43ddd59465f48aed6d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:09 +1000
Subject: [PATCH 233/287] KVM: Don't keep reference to irq routing table in
 irqfd struct

This makes the irqfd code keep a copy of the irq routing table entry
for each irqfd, rather than a reference to the copy in the actual
irq routing table maintained in kvm/virt/irqchip.c.  This will enable
us to change the routing table structure in future, or even not have a
routing table at all on some platforms.

The synchronization that was previously achieved using srcu_dereference
on the read side is now achieved using a seqcount_t structure.  That
ensures that we don't get a halfway-updated copy of the structure if
we read it while another thread is updating it.

We still use srcu_read_lock/unlock around the read side so that when
changing the routing table we can be sure that after calling
synchronize_srcu, nothing will be using the old routing.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 56f89f3629ffd1a21d38c3d0bea23deac0e284ce)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/eventfd.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b51c19ffd8fd..16bf26183225 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -33,6 +33,7 @@
 #include <linux/kernel.h>
 #include <linux/srcu.h>
 #include <linux/slab.h>
+#include <linux/seqlock.h>
 
 #include "iodev.h"
 
@@ -75,7 +76,8 @@ struct _irqfd {
 	struct kvm *kvm;
 	wait_queue_t wait;
 	/* Update side is protected by irqfds.lock */
-	struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+	struct kvm_kernel_irq_routing_entry irq_entry;
+	seqcount_t irq_entry_sc;
 	/* Used for level IRQ fast-path */
 	int gsi;
 	struct work_struct inject;
@@ -223,16 +225,20 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
 	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
 	unsigned long flags = (unsigned long)key;
-	struct kvm_kernel_irq_routing_entry *irq;
+	struct kvm_kernel_irq_routing_entry irq;
 	struct kvm *kvm = irqfd->kvm;
+	unsigned seq;
 	int idx;
 
 	if (flags & POLLIN) {
 		idx = srcu_read_lock(&kvm->irq_srcu);
-		irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
+		do {
+			seq = read_seqcount_begin(&irqfd->irq_entry_sc);
+			irq = irqfd->irq_entry;
+		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
 		/* An event has been signaled, inject an interrupt */
-		if (irq)
-			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+		if (irq.type == KVM_IRQ_ROUTING_MSI)
+			kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
 		else
 			schedule_work(&irqfd->inject);
@@ -277,18 +283,20 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 {
 	struct kvm_kernel_irq_routing_entry *e;
 
-	if (irqfd->gsi >= irq_rt->nr_rt_entries) {
-		rcu_assign_pointer(irqfd->irq_entry, NULL);
-		return;
-	}
+	write_seqcount_begin(&irqfd->irq_entry_sc);
+
+	irqfd->irq_entry.type = 0;
+	if (irqfd->gsi >= irq_rt->nr_rt_entries)
+		goto out;
 
 	hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
 		/* Only fast-path MSI. */
 		if (e->type == KVM_IRQ_ROUTING_MSI)
-			rcu_assign_pointer(irqfd->irq_entry, e);
-		else
-			rcu_assign_pointer(irqfd->irq_entry, NULL);
+			irqfd->irq_entry = *e;
 	}
+
+ out:
+	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
 static int
@@ -310,6 +318,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	INIT_LIST_HEAD(&irqfd->list);
 	INIT_WORK(&irqfd->inject, irqfd_inject);
 	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
+	seqcount_init(&irqfd->irq_entry_sc);
 
 	file = eventfd_fget(args->fd);
 	if (IS_ERR(file)) {
@@ -466,14 +475,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
 		if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
 			/*
-			 * This rcu_assign_pointer is needed for when
+			 * This clearing of irq_entry.type is needed for when
 			 * another thread calls kvm_irq_routing_update before
 			 * we flush workqueue below (we synchronize with
 			 * kvm_irq_routing_update using irqfds.lock).
-			 * It is paired with synchronize_srcu done by caller
-			 * of that function.
 			 */
-			rcu_assign_pointer(irqfd->irq_entry, NULL);
+			write_seqcount_begin(&irqfd->irq_entry_sc);
+			irqfd->irq_entry.type = 0;
+			write_seqcount_end(&irqfd->irq_entry_sc);
 			irqfd_deactivate(irqfd);
 		}
 	}

From 28bcfc22334c1214d603e5136b627d02f6eadb5e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:10 +1000
Subject: [PATCH 234/287] KVM: irqchip: Provide and use accessors for irq
 routing table

This provides accessor functions for the KVM interrupt mappings, in
order to reduce the amount of code that accesses the fields of the
kvm_irq_routing_table struct, and restrict that code to one file,
virt/kvm/irqchip.c.  The new functions are kvm_irq_map_gsi(), which
maps from a global interrupt number to a set of IRQ routing entries,
and kvm_irq_map_chip_pin, which maps from IRQ chip and pin numbers to
a global interrupt number.

This also moves the update of kvm_irq_routing_table::chip[][]
into irqchip.c, out of the various kvm_set_routing_entry
implementations.  That means that none of the kvm_set_routing_entry
implementations need the kvm_irq_routing_table argument anymore,
so this removes it.

This does not change any locking or data lifetime rules.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 8ba918d488caded2c4368b0b922eb905fe3bb101)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/powerpc/kvm/mpic.c  |  4 +---
 include/linux/kvm_host.h |  8 ++++++--
 virt/kvm/eventfd.c       | 10 ++++++----
 virt/kvm/irq_comm.c      | 20 +++++++++----------
 virt/kvm/irqchip.c       | 42 ++++++++++++++++++++++++++++++++--------
 5 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 2861ae9eaae6..b58d61039015 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1822,8 +1822,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return 0;
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -1835,7 +1834,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin;
 		if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1fb1bd4593a5..f91b5cda1d28 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -709,6 +709,11 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
+int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
+		    struct kvm_irq_routing_table *irq_rt, int gsi);
+int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
+			 unsigned irqchip, unsigned pin);
+
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
@@ -898,8 +903,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
 			unsigned flags);
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 16bf26183225..a75227f7d487 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -282,20 +282,22 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 			 struct kvm_irq_routing_table *irq_rt)
 {
 	struct kvm_kernel_irq_routing_entry *e;
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	int i, n_entries;
+
+	n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
 	irqfd->irq_entry.type = 0;
-	if (irqfd->gsi >= irq_rt->nr_rt_entries)
-		goto out;
 
-	hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
+	e = entries;
+	for (i = 0; i < n_entries; ++i, ++e) {
 		/* Only fast-path MSI. */
 		if (e->type == KVM_IRQ_ROUTING_MSI)
 			irqfd->irq_entry = *e;
 	}
 
- out:
 	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a228ee82bad2..175844593243 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,6 +160,7 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
  */
 int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 {
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
@@ -177,14 +178,13 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
-			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-				ret = kvm_set_msi_inatomic(e, kvm);
-			else
-				ret = -EWOULDBLOCK;
-			break;
-		}
+	if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) {
+		e = &entries[0];
+		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+			ret = kvm_set_msi_inatomic(e, kvm);
+		else
+			ret = -EWOULDBLOCK;
+	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
@@ -272,8 +272,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
-			  struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
 	int r = -EINVAL;
@@ -304,7 +303,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
 		e->irqchip.pin = ue->u.irqchip.pin + delta;
 		if (e->irqchip.pin >= max_pin)
 			goto out;
-		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
 		break;
 	case KVM_IRQ_ROUTING_MSI:
 		e->set = kvm_set_msi;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index b43c275775cd..f4648dd94888 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,13 +31,37 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
+int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
+		    struct kvm_irq_routing_table *irq_rt, int gsi)
+{
+	struct kvm_kernel_irq_routing_entry *e;
+	int n = 0;
+
+	if (gsi < irq_rt->nr_rt_entries) {
+		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+			entries[n] = *e;
+			++n;
+		}
+	}
+
+	return n;
+}
+
+int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
+			 unsigned irqchip, unsigned pin)
+{
+	return irq_rt->chip[irqchip][pin];
+}
+
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -54,13 +78,15 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -115,8 +141,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
-	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0, idx;
+	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
+	int ret = -1, i, idx;
 	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -127,9 +153,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (irq < irq_rt->nr_rt_entries)
-		hlist_for_each_entry(e, &irq_rt->map[irq], link)
-			irq_set[i++] = *e;
+	i = kvm_irq_map_gsi(irq_set, irq_rt, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
@@ -171,9 +195,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
 	e->gsi = ue->gsi;
 	e->type = ue->type;
-	r = kvm_set_routing_entry(rt, e, ue);
+	r = kvm_set_routing_entry(e, ue);
 	if (r)
 		goto out;
+	if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
+		rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
 
 	hlist_add_head(&e->link, &rt->map[e->gsi]);
 	r = 0;

From 7563524573fb8d7e723cec38329edf1b3968593c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:11 +1000
Subject: [PATCH 235/287] KVM: Move all accesses to kvm::irq_routing into
 irqchip.c

Now that struct _irqfd does not keep a reference to storage pointed
to by the irq_routing field of struct kvm, we can move the statement
that updates it out from under the irqfds.lock and put it in
kvm_set_irq_routing() instead.  That means we then have to take a
srcu_read_lock on kvm->irq_srcu around the irqfd_update call in
kvm_irqfd_assign(), since holding the kvm->irqfds.lock no longer
ensures that that the routing can't change.

Combined with changing kvm_irq_map_gsi() and kvm_irq_map_chip_pin()
to take a struct kvm * argument instead of the pointer to the routing
table, this allows us to to move all references to kvm->irq_routing
into irqchip.c.  That in turn allows us to move the definition of the
kvm_irq_routing_table struct into irqchip.c as well.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 9957c86d659a4d5a2bed25ccbd3bfc9c3f25e658)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h | 35 +++++++----------------------------
 virt/kvm/eventfd.c       | 22 +++++++++-------------
 virt/kvm/irq_comm.c      |  6 ++----
 virt/kvm/irqchip.c       | 39 +++++++++++++++++++++++++--------------
 4 files changed, 43 insertions(+), 59 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f91b5cda1d28..65b6b8d793b5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -301,24 +301,7 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-
-struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-	struct kvm_kernel_irq_routing_entry *rt_entries;
-	u32 nr_rt_entries;
-	/*
-	 * Array indexed by gsi. Each entry contains list of irq chips
-	 * the gsi is connected to.
-	 */
-	struct hlist_head map[0];
-};
-
-#else
-
-struct kvm_irq_routing_table {};
-
-#endif
+struct kvm_irq_routing_table;
 
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
@@ -377,8 +360,7 @@ struct kvm {
 	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	/*
-	 * Update side is protected by irq_lock and,
-	 * if configured, irqfds.lock.
+	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
@@ -709,10 +691,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask);
 
-int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
-		    struct kvm_irq_routing_table *irq_rt, int gsi);
-int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
-			 unsigned irqchip, unsigned pin);
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
 
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status);
@@ -923,7 +904,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
-void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
+void kvm_irq_routing_update(struct kvm *);
 #else
 static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
@@ -945,10 +926,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
-static inline void kvm_irq_routing_update(struct kvm *kvm,
-					  struct kvm_irq_routing_table *irq_rt)
+static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
 }
 #endif
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a75227f7d487..71133644f465 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -278,14 +278,13 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 }
 
 /* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
-			 struct kvm_irq_routing_table *irq_rt)
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	int i, n_entries;
 
-	n_entries = kvm_irq_map_gsi(entries, irq_rt, irqfd->gsi);
+	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
@@ -304,12 +303,12 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct file *file = NULL;
 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
 	unsigned int events;
+	int idx;
 
 	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
 	if (!irqfd)
@@ -403,9 +402,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		goto fail;
 	}
 
-	irq_rt = rcu_dereference_protected(kvm->irq_routing,
-					   lockdep_is_held(&kvm->irqfds.lock));
-	irqfd_update(kvm, irqfd, irq_rt);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irqfd_update(kvm, irqfd);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	events = file->f_op->poll(file, &irqfd->pt);
 
@@ -539,20 +538,17 @@ kvm_irqfd_release(struct kvm *kvm)
 }
 
 /*
- * Change irq_routing and irqfd.
+ * Take note of a change in irq routing.
  * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
-void kvm_irq_routing_update(struct kvm *kvm,
-			    struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm)
 {
 	struct _irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-	rcu_assign_pointer(kvm->irq_routing, irq_rt);
-
 	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
-		irqfd_update(kvm, irqfd, irq_rt);
+		irqfd_update(kvm, irqfd);
 
 	spin_unlock_irq(&kvm->irqfds.lock);
 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 175844593243..963b8995a9e8 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,7 +163,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
-	struct kvm_irq_routing_table *irq_rt;
 	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -177,8 +176,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * which is limited to 1:1 GSI mapping.
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	if (kvm_irq_map_gsi(entries, irq_rt, irq) > 0) {
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
 		e = &entries[0];
 		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
 			ret = kvm_set_msi_inatomic(e, kvm);
@@ -264,7 +262,7 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	int idx, gsi;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index f4648dd94888..04faac50cef5 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,12 +31,26 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
-int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
-		    struct kvm_irq_routing_table *irq_rt, int gsi)
+struct kvm_irq_routing_table {
+	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+	struct kvm_kernel_irq_routing_entry *rt_entries;
+	u32 nr_rt_entries;
+	/*
+	 * Array indexed by gsi. Each entry contains list of irq chips
+	 * the gsi is connected to.
+	 */
+	struct hlist_head map[0];
+};
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
+	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_kernel_irq_routing_entry *e;
 	int n = 0;
 
+	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+					lockdep_is_held(&kvm->irq_lock));
 	if (gsi < irq_rt->nr_rt_entries) {
 		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
 			entries[n] = *e;
@@ -47,21 +61,21 @@ int kvm_irq_map_gsi(struct kvm_kernel_irq_routing_entry *entries,
 	return n;
 }
 
-int kvm_irq_map_chip_pin(struct kvm_irq_routing_table *irq_rt,
-			 unsigned irqchip, unsigned pin)
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
+	struct kvm_irq_routing_table *irq_rt;
+
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	return irq_rt->chip[irqchip][pin];
 }
 
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -78,15 +92,13 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
-	struct kvm_irq_routing_table *irq_rt;
 	struct kvm_irq_ack_notifier *kian;
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(irq_rt, irqchip, pin);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
@@ -143,7 +155,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 {
 	struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
 	int ret = -1, i, idx;
-	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -152,8 +163,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * writes to the unused one.
 	 */
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
-	i = kvm_irq_map_gsi(irq_set, irq_rt, irq);
+	i = kvm_irq_map_gsi(kvm, irq_set, irq);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
@@ -250,7 +260,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 	mutex_lock(&kvm->irq_lock);
 	old = kvm->irq_routing;
-	kvm_irq_routing_update(kvm, new);
+	rcu_assign_pointer(kvm->irq_routing, new);
+	kvm_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
 	synchronize_srcu_expedited(&kvm->irq_srcu);

From c905880e253b8e2f77db26dcb647a1a3bb359b11 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:12 +1000
Subject: [PATCH 236/287] KVM: Move irq notifier implementation into eventfd.c

This moves the functions kvm_irq_has_notifier(), kvm_notify_acked_irq(),
kvm_register_irq_ack_notifier() and kvm_unregister_irq_ack_notifier()
from irqchip.c to eventfd.c.  The reason for doing this is that those
functions are used in connection with IRQFDs, which are implemented in
eventfd.c.  In future we will want to use IRQFDs on platforms that
don't implement the GSI routing implemented in irqchip.c, so we won't
be compiling in irqchip.c, but we still need the irq notifiers.  The
implementation is unchanged.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e4d57e1ee1ab59f0cef0272800ac6c52e0ec814a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/eventfd.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/irqchip.c | 61 --------------------------------------------
 2 files changed, 63 insertions(+), 61 deletions(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 71133644f465..d2b1653c97dd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -34,7 +34,9 @@
 #include <linux/srcu.h>
 #include <linux/slab.h>
 #include <linux/seqlock.h>
+#include <trace/events/kvm.h>
 
+#include "irq.h"
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
@@ -830,3 +832,64 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	return kvm_assign_ioeventfd(kvm, args);
 }
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi) {
+				srcu_read_unlock(&kvm->irq_srcu, idx);
+				return true;
+			}
+
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	trace_kvm_ack_irq(irqchip, pin);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_init_rcu(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 04faac50cef5..7f256f31df10 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -69,67 +69,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	return irq_rt->chip[irqchip][pin];
 }
 
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi) {
-				srcu_read_unlock(&kvm->irq_srcu, idx);
-				return true;
-			}
-
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	trace_kvm_ack_irq(irqchip, pin);
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				    struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
-
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
 	struct kvm_kernel_irq_routing_entry route;

From f65b953a7232ae8bb497b226363629f9e49b4792 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 30 Jun 2014 20:51:13 +1000
Subject: [PATCH 237/287] KVM: Give IRQFD its own separate enabling Kconfig
 option

Currently, the IRQFD code is conditional on CONFIG_HAVE_KVM_IRQ_ROUTING.
So that we can have the IRQFD code compiled in without having the
IRQ routing code, this creates a new CONFIG_HAVE_KVM_IRQFD, makes
the IRQFD code conditional on it instead of CONFIG_HAVE_KVM_IRQ_ROUTING,
and makes all the platforms that currently select HAVE_KVM_IRQ_ROUTING
also select HAVE_KVM_IRQFD.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 297e21053a52f060944e9f0de4c64fad9bcd72fc)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/ia64/kvm/Kconfig    | 1 +
 arch/powerpc/kvm/Kconfig | 1 +
 arch/x86/kvm/Kconfig     | 1 +
 include/linux/kvm_host.h | 8 ++++----
 virt/kvm/Kconfig         | 3 +++
 virt/kvm/eventfd.c       | 6 +++---
 virt/kvm/kvm_main.c      | 2 +-
 7 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 990b86420cc6..3d50ea955c4c 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -25,6 +25,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select KVM_APIC_ARCHITECTURE
 	select KVM_MMIO
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f862579..60019a6fd6bb 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -155,6 +155,7 @@ config KVM_MPIC
 	bool "KVM in-kernel MPIC emulation"
 	depends on KVM && E500
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_MSI
 	help
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b89c5db2b832..bdccfb62aa0d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
 	select MMU_NOTIFIER
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_EVENTFD
 	select KVM_APIC_ARCHITECTURE
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 65b6b8d793b5..35319fe693f8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -413,7 +413,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
 #else
@@ -888,20 +888,20 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-
 #else
 
 static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
 void kvm_irqfd_release(struct kvm *kvm);
 void kvm_irq_routing_update(struct kvm *);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 13f2d19793e3..fc0c5e603eb4 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
 config HAVE_KVM_IRQCHIP
        bool
 
+config HAVE_KVM_IRQFD
+       bool
+
 config HAVE_KVM_IRQ_ROUTING
        bool
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index d2b1653c97dd..8be5b6545770 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -39,7 +39,7 @@
 #include "irq.h"
 #include "iodev.h"
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * --------------------------------------------------------------------
  * irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -450,7 +450,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 void
 kvm_eventfd_init(struct kvm *kvm)
 {
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
 	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -459,7 +459,7 @@ kvm_eventfd_init(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 /*
  * shutdown any irqfd's that match fd+gsi
  */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1edb15da7acf..848a6afab165 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2330,7 +2330,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #ifdef CONFIG_HAVE_KVM_MSI
 	case KVM_CAP_SIGNAL_MSI:
 #endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	case KVM_CAP_IRQFD_RESAMPLE:
 #endif
 	case KVM_CAP_CHECK_EXTENSION_VM:

From 57f984d2c64c8c1e597d021567a379f4f3920b35 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 6 Aug 2014 14:24:45 +0200
Subject: [PATCH 238/287] KVM: Move more code under CONFIG_HAVE_KVM_IRQFD

Commits e4d57e1ee1ab (KVM: Move irq notifier implementation into
eventfd.c, 2014-06-30) included the irq notifier code unconditionally
in eventfd.c, while it was under CONFIG_HAVE_KVM_IRQCHIP before.

Similarly, commit 297e21053a52 (KVM: Give IRQFD its own separate enabling
Kconfig option, 2014-06-30) moved code from CONFIG_HAVE_IRQ_ROUTING
to CONFIG_HAVE_KVM_IRQFD but forgot to move the pieces that used to be
under CONFIG_HAVE_KVM_IRQCHIP.

Together, this broke compilation without CONFIG_KVM_XICS.  Fix by adding
or changing the #ifdefs so that they point at CONFIG_HAVE_KVM_IRQFD.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c77dcacb397519b6ade8f08201a4a90a7f4f751e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h   |   2 +
 include/trace/events/kvm.h |   8 +--
 virt/kvm/eventfd.c         | 122 ++++++++++++++++++-------------------
 virt/kvm/kvm_main.c        |   2 +
 4 files changed, 69 insertions(+), 65 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 35319fe693f8..75d911ca47bd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -364,6 +364,8 @@ struct kvm {
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
 	struct hlist_head mask_notifier_list;
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
 #endif
 
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 131a0bda7aec..908925ace776 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
 		  __entry->errno < 0 ? -__entry->errno : __entry->reason)
 );
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 TRACE_EVENT(kvm_set_irq,
 	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
 	TP_ARGS(gsi, level, irq_source_id),
@@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq,
 	TP_printk("gsi %u level %d source %d",
 		  __entry->gsi, __entry->level, __entry->irq_source_id)
 );
-#endif
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 #if defined(__KVM_HAVE_IOAPIC)
 #define kvm_deliver_mode		\
@@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq,
 
 #endif /* defined(__KVM_HAVE_IOAPIC) */
 
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
 
 TRACE_EVENT(kvm_ack_irq,
 	TP_PROTO(unsigned int irqchip, unsigned int pin),
@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq,
 #endif
 );
 
-#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
 
 
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 8be5b6545770..67563284f7b9 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -445,6 +445,67 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	kfree(irqfd);
 	return ret;
 }
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi) {
+				srcu_read_unlock(&kvm->irq_srcu, idx);
+				return true;
+			}
+
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+	struct kvm_irq_ack_notifier *kian;
+	int gsi, idx;
+
+	trace_kvm_ack_irq(irqchip, pin);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+					 link)
+			if (kian->gsi == gsi)
+				kian->irq_acked(kian);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_init_rcu(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+	kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
 #endif
 
 void
@@ -832,64 +893,3 @@ kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	return kvm_assign_ioeventfd(kvm, args);
 }
-
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi) {
-				srcu_read_unlock(&kvm->irq_srcu, idx);
-				return true;
-			}
-
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-
-	return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
-	struct kvm_irq_ack_notifier *kian;
-	int gsi, idx;
-
-	trace_kvm_ack_irq(irqchip, pin);
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-				   struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-				    struct kvm_irq_ack_notifier *kian)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_init_rcu(&kian->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
-	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
-}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 848a6afab165..50f947301fa7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -464,6 +464,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 

From 7fdbe25673a0bcd4c38968ec2cd048a21f1fbb93 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 19 Aug 2014 16:45:56 +0200
Subject: [PATCH 239/287] KVM: avoid unnecessary synchronize_rcu

We dont have to wait for a grace period if there is no oldpid that
we are going to free. putpid also checks for NULL, so this patch
only fences synchronize_rcu.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 7103f60de8bed21a0ad5d15d2ad5b7a333dda201)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 50f947301fa7..aa5057e7a1b9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -129,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 		struct pid *oldpid = vcpu->pid;
 		struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
 		rcu_assign_pointer(vcpu->pid, newpid);
-		synchronize_rcu();
+		if (oldpid)
+			synchronize_rcu();
 		put_pid(oldpid);
 	}
 	cpu = get_cpu();

From ae38cfa961f9605be43060001ccc41d39427cb64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:05 +0200
Subject: [PATCH 240/287] KVM: add kvm_arch_sched_in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce preempt notifiers for architecture specific code.
Advantage over creating a new notifier in every arch is slightly simpler
code and guaranteed call order with respect to kvm_sched_in.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit e790d9ef6405633b007339d746b709aed43a928d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c         | 4 ++++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 4 ++++
 include/linux/kvm_host.h   | 2 ++
 virt/kvm/kvm_main.c        | 2 ++
 6 files changed, 20 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 0e3d3dc8eea2..d55786c579bf 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index aacf3e35404f..e4c719d49e16 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -533,6 +533,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9133f869b070..46392afd043d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -325,6 +325,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	/* Nothing todo */
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_regs(&vcpu->arch.host_fpregs);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 46a8c74fd431..a348daad61fd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6809,6 +6809,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	if (type)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 75d911ca47bd..4c12f314aab2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -600,6 +600,8 @@ void kvm_arch_exit(void);
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index aa5057e7a1b9..2659da911d99 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3063,6 +3063,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 	if (vcpu->preempted)
 		vcpu->preempted = false;
 
+	kvm_arch_sched_in(vcpu, cpu);
+
 	kvm_arch_vcpu_load(vcpu, cpu);
 }
 

From 7ec68cffdc4807a95ae1db300bf857084f0c94da Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Aug 2014 12:15:00 +0200
Subject: [PATCH 241/287] KVM: Introduce gfn_to_hva_memslot_prot

To support read-only memory regions on arm and arm64, we have a need to
resolve a gfn to an hva given a pointer to a memslot to avoid looping
through the memslots twice and to reuse the hva error checking of
gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and
refactor gfn_to_hva_prot() to use this function.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4c12f314aab2..59d50379c2e4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -504,6 +504,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+				      bool *writable);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2659da911d99..84cee09da140 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
  * If writable is set to false, the hva returned by this function is only
  * allowed to be read.
  */
-unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+				      gfn_t gfn, bool *writable)
 {
-	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
 
 	if (!kvm_is_error_hva(hva) && writable)
@@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 	return hva;
 }
 
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+	return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
 static int kvm_read_hva(void *data, void __user *hva, int len)
 {
 	return __copy_from_user(data, hva, len);

From 7e7ef8405025cd7af730fce40ace2a9c77ee9970 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Aug 2014 12:18:04 +0200
Subject: [PATCH 242/287] arm/arm64: KVM: Support KVM_CAP_READONLY_MEM

When userspace loads code and data in a read-only memory regions, KVM
needs to be able to handle this on arm and arm64.  Specifically this is
used when running code directly from a read-only flash device; the
common scenario is a UEFI blob loaded with the -bios option in QEMU.

Note that the MMIO exit on writes to a read-only memory is ABI and can
be used to emulate block-erase style flash devices.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 98047888bb9fd57734028c44ec17413ddd623958)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/uapi/asm/kvm.h   |  1 +
 arch/arm/kvm/arm.c                |  1 +
 arch/arm/kvm/mmu.c                | 22 ++++++++--------------
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 4 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index e6ebdd3471e5..51257fda254b 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d55786c579bf..35dc889df3d2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -188,6 +188,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ARM_PSCI:
 	case KVM_CAP_ARM_PSCI_0_2:
+	case KVM_CAP_READONLY_MEM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16e7994bf347..62f5642153f9 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -747,14 +747,13 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot,
+			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
 {
 	int ret;
 	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
 	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
-	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
@@ -863,7 +862,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	unsigned long fault_status;
 	phys_addr_t fault_ipa;
 	struct kvm_memory_slot *memslot;
-	bool is_iabt;
+	unsigned long hva;
+	bool is_iabt, write_fault, writable;
 	gfn_t gfn;
 	int ret, idx;
 
@@ -884,7 +884,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	gfn = fault_ipa >> PAGE_SHIFT;
-	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
 			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -892,13 +895,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			goto out_unlock;
 		}
 
-		if (fault_status != FSC_FAULT) {
-			kvm_err("Unsupported fault status on io memory: %#lx\n",
-				fault_status);
-			ret = -EFAULT;
-			goto out_unlock;
-		}
-
 		/*
 		 * The IPA is reported as [MAX:12], so we need to
 		 * complement it with the bottom 12 bits from the
@@ -910,9 +906,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		goto out_unlock;
 	}
 
-	memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index e633ff8cdec8..f4ec5a674d05 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,6 +37,7 @@
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))

From 276359920df8032e689497456c0a3c6dcc1b5100 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:20 +0100
Subject: [PATCH 243/287] KVM: ARM/arm64: fix non-const declaration of function
 returning const

Sparse kicks up about a type mismatch for kvm_target_cpu:

arch/arm64/kvm/guest.c:271:25: error: symbol 'kvm_target_cpu' redeclared with different type (originally declared at ./arch/arm64/include/asm/kvm_host.h:45) - different modifiers

so fix this by adding the missing const attribute to the function
declaration.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 6951e48bff0b55d2a8e825a953fc1f8e3a34bf1c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   | 2 +-
 arch/arm64/include/asm/kvm_host.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6dfb404f6c46..fcb12a6f7db5 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -42,7 +42,7 @@
 
 struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e10c45a578e3..44094e559848 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -42,7 +42,7 @@
 #define KVM_VCPU_MAX_FEATURES 3
 
 struct kvm_vcpu;
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
 

From 0251cb8ae768874a988c39a908fbcc8dfd82429a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:21 +0100
Subject: [PATCH 244/287] KVM: ARM/arm64: fix broken __percpu annotation

Running sparse results in a bunch of noisy address space mismatches
thanks to the broken __percpu annotation on kvm_get_running_vcpus.

This function returns a pcpu pointer to a pointer, not a pointer to a
pcpu pointer. This patch fixes the annotation, which kills the warnings
from sparse.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 4000be423cb01a8d09de878bb8184511c49d4238)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c                | 2 +-
 arch/arm64/include/asm/kvm_host.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 35dc889df3d2..79268a12a49e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -82,7 +82,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
 /**
  * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  */
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 {
 	return &kvm_arm_running_vcpu;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 44094e559848..50431d36732b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -193,7 +193,7 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 }
 
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
 

From 563d813862bf2c44e1e9f0d59facf8e0ce303355 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:22 +0100
Subject: [PATCH 245/287] KVM: ARM/arm64: avoid returning negative error code
 as bool

is_valid_cache returns true if the specified cache is valid.
Unfortunately, if the parameter passed it out of range, we return
-ENOENT, which ends up as true leading to potential hilarity.

This patch returns false on the failure path instead.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 18d457661fb9fa69352822ab98d39331c3d0e571)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c     | 2 +-
 arch/arm64/kvm/sys_regs.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 37a0fe1bb9bb..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val)
 	u32 level, ctype;
 
 	if (val >= CSSELR_MAX)
-		return -ENOENT;
+		return false;
 
 	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
         level = (val >> 1);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5805e7c4a4dd..4cc3b719208e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val)
 	u32 level, ctype;
 
 	if (val >= CSSELR_MAX)
-		return -ENOENT;
+		return false;
 
 	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
 	level = (val >> 1);

From 1453b5c1052996b983f6d9ae043dec64e089c0c8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:23 +0100
Subject: [PATCH 246/287] KVM: ARM/arm64: return -EFAULT if copy_from_user
 fails in set_timer_reg

We currently return the number of bytes not copied if set_timer_reg
fails, which is almost certainly not what userspace would like.

This patch returns -EFAULT instead.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit bd218bce92d3868ba4fe5e9e3eb8199d2aa614af)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/guest.c   | 2 +-
 arch/arm64/kvm/guest.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 813e49258690..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 
 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
 	if (ret != 0)
-		return ret;
+		return -EFAULT;
 
 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
 }
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 8d1ec2887a26..76794692c20b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 
 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
 	if (ret != 0)
-		return ret;
+		return -EFAULT;
 
 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
 }

From 02b1b15d1265067d246eb8017c27c75d2fac5dae Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:24 +0100
Subject: [PATCH 247/287] KVM: vgic: return int instead of bool when checking
 I/O ranges

vgic_ioaddr_overlap claims to return a bool, but in reality it returns
an int. Shut sparse up by fixing the type signature.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 1fa451bcc67fa921a04c5fac8dbcde7844d54512)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba793b17f..d1cfe672b9d7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1690,7 +1690,7 @@ int kvm_vgic_create(struct kvm *kvm)
 	return ret;
 }
 
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
+static int vgic_ioaddr_overlap(struct kvm *kvm)
 {
 	phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
 	phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;

From 8ec715912ec282c0c560407fe72ae80b14c8d320 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:25 +0100
Subject: [PATCH 248/287] KVM: vgic: declare probe function pointer as const

We extract the vgic probe function from the of_device_id data pointer,
which is const. Kill the sparse warning by ensuring that the local
function pointer is also marked as const.

Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit de56fb1923ca11f428bf557870e0faa99f38762e)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index d1cfe672b9d7..efe6eee2e7eb 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1557,8 +1557,8 @@ static const struct of_device_id vgic_ids[] = {
 int kvm_vgic_hyp_init(void)
 {
 	const struct of_device_id *matched_id;
-	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-			  const struct vgic_params **);
+	const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+				const struct vgic_params **);
 	struct device_node *vgic_node;
 	int ret;
 

From d3b49fbe5764b06372df1dea9c9bea5b8373f49c Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 26 Aug 2014 14:00:37 +0200
Subject: [PATCH 249/287] KVM: Unconditionally export KVM_CAP_READONLY_MEM

The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that
userspace can, at run-time, determine if a feature is supported or not.
This allows KVM to being supporting a new feature with a new kernel
version without any need to update user space.  Unfortunately, since the
definition of KVM_CAP_READONLY_MEM was guarded by #ifdef
__KVM_HAVE_READONLY_MEM, such discovery still required a user space
update.

Therefore, unconditionally export KVM_CAP_READONLY_MEM and change the
in-kernel conditional to rely on __KVM_HAVE_READONLY_MEM.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0f8a4de3e088797576ac76200b634b802e5c7781)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/uapi/linux/kvm.h | 2 --
 virt/kvm/kvm_main.c      | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1af686a82703..023ec6132966 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -662,9 +662,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
-#ifdef __KVM_HAVE_READONLY_MEM
 #define KVM_CAP_READONLY_MEM 81
-#endif
 #define KVM_CAP_IRQFD_RESAMPLE 82
 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83
 #define KVM_CAP_PPC_HTAB_FD 84
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 84cee09da140..0cb02c749622 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -709,7 +709,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 {
 	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
 
-#ifdef KVM_CAP_READONLY_MEM
+#ifdef __KVM_HAVE_READONLY_MEM
 	valid_flags |= KVM_MEM_READONLY;
 #endif
 

From 177e51e89403b6843d068a1dae873650053b32a4 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 26 Aug 2014 14:00:38 +0200
Subject: [PATCH 250/287] KVM: Unconditionally export KVM_CAP_USER_NMI

The idea between capabilities and the KVM_CHECK_EXTENSION ioctl is that
userspace can, at run-time, determine if a feature is supported or not.
This allows KVM to being supporting a new feature with a new kernel
version without any need to update user space.  Unfortunately, since the
definition of KVM_CAP_USER_NMI was guarded by #ifdef
__KVM_HAVE_USER_NMI, such discovery still required a user space update.

Therefore, unconditionally export KVM_CAP_USER_NMI and change the
the typo in the comment for the IOCTL number definition as well.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 44b5ce73c99c389817be71b9161bceb197d40ecb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/uapi/linux/kvm.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 023ec6132966..bbc1f8a09eb8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -578,9 +578,7 @@ struct kvm_ppc_smmu_info {
 #endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
-#endif
 #ifdef __KVM_HAVE_GUEST_DEBUG
 #define KVM_CAP_SET_GUEST_DEBUG 23
 #endif
@@ -995,7 +993,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_S390_INITIAL_RESET    _IO(KVMIO,   0x97)
 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
-/* Available with KVM_CAP_NMI */
+/* Available with KVM_CAP_USER_NMI */
 #define KVM_NMI                   _IO(KVMIO,   0x9a)
 /* Available with KVM_CAP_SET_GUEST_DEBUG */
 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)

From 48bc31d532d18a5e64c5ef1f20be161d0b834d11 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 29 Aug 2014 14:01:17 +0200
Subject: [PATCH 251/287] KVM: forward declare structs in kvm_types.h

Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid
"'struct foo' declared inside parameter list" warnings (and consequent
breakage due to conflicting types).

Move them from individual files to a generic place in linux/kvm_types.h.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 656473003bc7e056c3bbd4a4d9832dad01e86f76)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h     |  7 ++-----
 arch/arm64/include/asm/kvm_host.h   |  6 ++----
 arch/ia64/include/asm/kvm_host.h    |  3 ---
 arch/mips/include/asm/kvm_host.h    |  5 -----
 arch/powerpc/include/asm/kvm_host.h |  5 -----
 arch/s390/include/asm/kvm_host.h    |  3 +++
 arch/x86/include/asm/kvm_host.h     |  4 ----
 include/linux/kvm_host.h            |  6 ------
 include/linux/kvm_types.h           | 14 ++++++++++++++
 9 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index fcb12a6f7db5..0bc5295bbfdf 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -40,7 +42,6 @@
 
 #include <kvm/arm_vgic.h>
 
-struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
@@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 50431d36732b..ac99093d004e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
 #ifndef __ARM64_KVM_HOST_H__
 #define __ARM64_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -41,7 +43,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 3
 
-struct kvm_vcpu;
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -164,18 +165,15 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 989dd3fe8de1..65088aa016c0 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -238,9 +238,6 @@ struct kvm_vm_data {
 #define KVM_NR_PAGE_SIZES	1
 #define KVM_PAGES_PER_HPAGE(x)	1
 
-struct kvm;
-struct kvm_vcpu;
-
 struct kvm_mmio_req {
 	uint64_t addr;          /*  physical address		*/
 	uint64_t size;          /*  size in bytes		*/
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4d6fa0bf1305..d56ee50d6885 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -71,11 +71,6 @@
 #define CAUSEB_DC       27
 #define CAUSEF_DC       (_ULCAST_(1)   << 27)
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-struct kvm_interrupt;
-
 extern atomic_t kvm_mips_instance;
 extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cde7cb6..0a3238026fd7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,7 +53,6 @@
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
-struct kvm;
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
@@ -81,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM			0x0fffffffffffffffULL
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
 struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 16bd5d169cdb..42913475fc6c 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+
+#include <linux/types.h>
 #include <linux/hrtimer.h>
 #include <linux/interrupt.h>
+#include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <asm/debug.h>
 #include <asm/cpu.h>
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 53db582487c9..bd6f3529453d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -103,10 +103,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 
 #define ASYNC_PF_PER_VCPU 64
 
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
 enum kvm_reg {
 	VCPU_REGS_RAX = 0,
 	VCPU_REGS_RCX = 1,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 59d50379c2e4..b8cb3c2d893e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -129,8 +129,6 @@ static inline bool is_error_page(struct page *page)
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
-struct kvm;
-struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
 extern spinlock_t kvm_lock;
@@ -301,8 +299,6 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-struct kvm_irq_routing_table;
-
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
@@ -994,8 +990,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
 extern bool kvm_rebooting;
 
-struct kvm_device_ops;
-
 struct kvm_device {
 	struct kvm_device_ops *ops;
 	struct kvm *kvm;
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc95..b606bb689a3e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
 #ifndef __KVM_TYPES_H__
 #define __KVM_TYPES_H__
 
+struct kvm;
+struct kvm_async_pf;
+struct kvm_device_ops;
+struct kvm_interrupt;
+struct kvm_irq_routing_table;
+struct kvm_memory_slot;
+struct kvm_one_reg;
+struct kvm_run;
+struct kvm_userspace_memory_region;
+struct kvm_vcpu;
+struct kvm_vcpu_init;
+
+enum kvm_mr_change;
+
 #include <asm/types.h>
 
 /*

From 3dcac226202ec154b5c29bcd968e7b894e61c031 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:02 +0200
Subject: [PATCH 252/287] KVM: static inline empty kvm_arch functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using static inline is going to save few bytes and cycles.
For example on powerpc, the difference is 700 B after stripping.
(5 kB before)

This patch also deals with two overlooked empty functions:
kvm_arch_flush_shadow was not removed from arch/mips/kvm/mips.c
  2df72e9bc KVM: split kvm_arch_flush_shadow
and kvm_arch_sched_in never made it into arch/ia64/kvm/kvm-ia64.c.
  e790d9ef6 KVM: add kvm_arch_sched_in

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0865e636aef751966e6e0f8950a26bc7391e923c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h     |  6 ++++
 arch/arm/kvm/arm.c                  | 19 -------------
 arch/arm64/include/asm/kvm_host.h   |  6 ++++
 arch/ia64/include/asm/kvm_host.h    | 12 ++++++++
 arch/ia64/kvm/kvm-ia64.c            | 30 --------------------
 arch/mips/include/asm/kvm_host.h    | 11 ++++++++
 arch/powerpc/include/asm/kvm_host.h |  8 ++++++
 arch/powerpc/kvm/powerpc.c          | 28 -------------------
 arch/s390/include/asm/kvm_host.h    | 14 ++++++++++
 arch/s390/kvm/kvm-s390.c            | 43 -----------------------------
 10 files changed, 57 insertions(+), 120 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0bc5295bbfdf..2b17f6ab9642 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,4 +230,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 79268a12a49e..882f7e856b6b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = 0;
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
 
 /**
  * kvm_arch_init_vm - initializes a VM data structure
@@ -285,14 +274,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ac99093d004e..eaf689c48a59 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,4 +242,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 65088aa016c0..cf03097176b1 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -596,6 +596,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
 
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		const struct kvm_memory_slot *old,
+		enum kvm_mr_change change) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+
 #endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index b48aa69e35e1..b1e074d299ea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1363,10 +1363,6 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 	}
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_iommu_unmap_guest(kvm);
@@ -1375,10 +1371,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_release_vm_pages(kvm);
 }
 
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	if (cpu != vcpu->cpu) {
@@ -1467,7 +1459,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kfree(vcpu->arch.apic);
 }
 
-
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -1550,21 +1541,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
 	return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
 		struct kvm_userspace_memory_region *mem,
@@ -1596,14 +1578,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	return 0;
 }
 
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
-		const struct kvm_memory_slot *old,
-		enum kvm_mr_change change)
-{
-	return;
-}
-
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
@@ -1852,10 +1826,6 @@ int kvm_arch_hardware_setup(void)
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
 	return __apic_accept_irq(vcpu, irq->vector);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index d56ee50d6885..279376e90c7f 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -654,5 +654,16 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size);
 extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0a3238026fd7..90a9c0e4952c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -623,4 +623,12 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_exit(void) {}
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e4c719d49e16..7a75d6dbf610 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -251,19 +251,11 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = kvmppc_core_check_processor_compat();
@@ -296,10 +288,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
@@ -421,10 +409,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
@@ -441,10 +425,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	kvmppc_core_commit_memory_region(kvm, mem, old);
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot)
 {
@@ -533,10 +513,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
@@ -1134,7 +1110,3 @@ int kvm_arch_init(void *opaque)
 {
 	return 0;
 }
-
-void kvm_arch_exit(void)
-{
-}
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 42913475fc6c..d45e11dac5cf 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -269,4 +269,18 @@ struct kvm_arch{
 };
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+
 #endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 46392afd043d..1fe3bc10c87f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -92,10 +92,6 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
@@ -105,19 +101,11 @@ void kvm_arch_hardware_unsetup(void)
 {
 }
 
-void kvm_arch_check_processor_compat(void *rtn)
-{
-}
-
 int kvm_arch_init(void *opaque)
 {
 	return 0;
 }
 
-void kvm_arch_exit(void)
-{
-}
-
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
@@ -289,10 +277,6 @@ static void kvm_free_vcpus(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_free_vcpus(kvm);
@@ -320,15 +304,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-	/* Nothing todo */
-}
-
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_regs(&vcpu->arch.host_fpregs);
@@ -975,21 +950,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
 	return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
@@ -1035,15 +1001,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	return;
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
-
 static int __init kvm_s390_init(void)
 {
 	int ret;

From df6fef6a404701ff6249330215c31f2a2b041d7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:03 +0200
Subject: [PATCH 253/287] KVM: remove garbage arg to *hardware_{en,dis}able
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the beggining was on_each_cpu(), which required an unused argument to
kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten.

Remove unnecessary arguments that stem from this.

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 13a34e067eab24fec882e1834fbf2cc31911d474)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h     |  2 +-
 arch/arm/kvm/arm.c                  |  2 +-
 arch/arm64/include/asm/kvm_host.h   |  2 +-
 arch/ia64/kvm/kvm-ia64.c            |  4 ++--
 arch/mips/include/asm/kvm_host.h    |  2 +-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kvm/powerpc.c          |  2 +-
 arch/s390/include/asm/kvm_host.h    |  2 +-
 arch/s390/kvm/kvm-s390.c            |  2 +-
 arch/x86/include/asm/kvm_host.h     |  4 ++--
 arch/x86/kvm/svm.c                  |  4 ++--
 arch/x86/kvm/vmx.c                  |  4 ++--
 arch/x86/kvm/x86.c                  | 12 ++++++------
 include/linux/kvm_host.h            |  4 ++--
 virt/kvm/kvm_main.c                 |  4 ++--
 15 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 2b17f6ab9642..46e5d4da1989 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 882f7e856b6b..c8ff64b54459 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -87,7 +87,7 @@ struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index eaf689c48a59..bcde41905746 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index b1e074d299ea..c9aa236dc29b 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	long  status;
 	long  tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
 
 	long status;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 279376e90c7f..5e3f4b0f18c8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -654,7 +654,7 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size);
 extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 90a9c0e4952c..f391f3fbde8b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -623,7 +623,7 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7a75d6dbf610..ea4cfdc991da 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -246,7 +246,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	return r;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d45e11dac5cf..99971dfc6b9a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -270,7 +270,7 @@ struct kvm_arch{
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
 static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1fe3bc10c87f..412fbc5dc688 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -86,7 +86,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 static unsigned long long *facilities;
 
 /* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	/* every s390 is virtualization enabled ;-) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bd6f3529453d..5137dca9e9d6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -634,8 +634,8 @@ struct msr_data {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	int (*hardware_enable)(void *dummy);
-	void (*hardware_disable)(void *dummy);
+	int (*hardware_enable)(void);
+	void (*hardware_disable)(void);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a14a6eaf871d..934befea3e36 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -606,7 +606,7 @@ static int has_svm(void)
 	return 1;
 }
 
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
 {
 	/* Make sure we clean up behind us */
 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -617,7 +617,7 @@ static void svm_hardware_disable(void *garbage)
 	amd_pmu_disable_virt();
 }
 
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
 {
 
 	struct svm_cpu_data *sd;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5402c94ab768..af423252c265 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2566,7 +2566,7 @@ static void kvm_cpu_vmxon(u64 addr)
 			: "memory", "cc");
 }
 
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2630,7 +2630,7 @@ static void kvm_cpu_vmxoff(void)
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 }
 
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
 {
 	if (vmm_exclusive) {
 		vmclear_local_loaded_vmcss();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a348daad61fd..37d9503b81ea 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -239,7 +239,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -6603,7 +6603,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
 	kvm_rip_write(vcpu, 0);
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
@@ -6614,7 +6614,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	bool stable, backwards_tsc = false;
 
 	kvm_shared_msr_cpu_online();
-	ret = kvm_x86_ops->hardware_enable(garbage);
+	ret = kvm_x86_ops->hardware_enable();
 	if (ret != 0)
 		return ret;
 
@@ -6694,10 +6694,10 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
-	kvm_x86_ops->hardware_disable(garbage);
-	drop_user_return_notifiers(garbage);
+	kvm_x86_ops->hardware_disable();
+	drop_user_return_notifiers();
 }
 
 int kvm_arch_hardware_setup(void)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b8cb3c2d893e..ce4cf2f1c419 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -608,8 +608,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
-int kvm_arch_hardware_enable(void *garbage);
-void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0cb02c749622..556049f120b7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk)
 
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
 
-	r = kvm_arch_hardware_enable(NULL);
+	r = kvm_arch_hardware_enable();
 
 	if (r) {
 		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk)
 	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
 	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_disable(NULL);
+	kvm_arch_hardware_disable();
 }
 
 static void hardware_disable(void)

From 25523c743ec5bf8ebe707ac8a8112eac0a2d42b6 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:31 +0200
Subject: [PATCH 254/287] KVM: remove redundant check of in_spin_loop

The expression `vcpu->spin_loop.in_spin_loop' is always true,
because it is evaluated only when the condition
`!vcpu->spin_loop.in_spin_loop' is false.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 34656113182b704682e23d1363417536addfec97)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 556049f120b7..025f8300b6f0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1774,8 +1774,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 	bool eligible;
 
 	eligible = !vcpu->spin_loop.in_spin_loop ||
-			(vcpu->spin_loop.in_spin_loop &&
-			 vcpu->spin_loop.dy_eligible);
+		    vcpu->spin_loop.dy_eligible;
 
 	if (vcpu->spin_loop.in_spin_loop)
 		kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);

From c4e4c7bf26b78b6cb8fa985c19854c8f6e9abc20 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:32 +0200
Subject: [PATCH 255/287] KVM: remove redundant assigment of return value in
 kvm_dev_ioctl

The first statement of kvm_dev_ioctl is
        long r = -EINVAL;

No need to reassign the same value.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit a13f533b2f1d53a7c0baa7490498caeab7bc8ba5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 025f8300b6f0..d201466921bf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2611,7 +2611,6 @@ static long kvm_dev_ioctl(struct file *filp,
 
 	switch (ioctl) {
 	case KVM_GET_API_VERSION:
-		r = -EINVAL;
 		if (arg)
 			goto out;
 		r = KVM_API_VERSION;
@@ -2623,7 +2622,6 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
-		r = -EINVAL;
 		if (arg)
 			goto out;
 		r = PAGE_SIZE;     /* struct kvm_run */

From e665e3aae1e8c2e47e467c13c10e5b92c4be61a5 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 4 Sep 2014 21:13:33 +0200
Subject: [PATCH 256/287] KVM: remove redundant assignments in
 __kvm_set_memory_region

__kvm_set_memory_region sets r to EINVAL very early.
Doing it again is not necessary. The same is true later on, where
r is assigned -ENOMEM twice.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit f2a25160887e00434ce1361007009120e1fecbda)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d201466921bf..3ed27192a849 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -778,7 +778,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
-	r = -EINVAL;
 	if (npages > KVM_MEM_MAX_NR_PAGES)
 		goto out;
 
@@ -792,7 +791,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	new.npages = npages;
 	new.flags = mem->flags;
 
-	r = -EINVAL;
 	if (npages) {
 		if (!old.npages)
 			change = KVM_MR_CREATE;
@@ -848,7 +846,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		r = -ENOMEM;
 		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
 				GFP_KERNEL);
 		if (!slots)

From cf0dfca545678c6dc257a75094f13515e0bf6c69 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 9 Sep 2014 11:27:09 +0100
Subject: [PATCH 257/287] ARM/arm64: KVM: fix use of WnR bit in
 kvm_is_write_fault()

The ISS encoding for an exception from a Data Abort has a WnR
bit[6] that indicates whether the Data Abort was caused by a
read or a write instruction. While there are several fields
in the encoding that are only valid if the ISV bit[24] is set,
WnR is not one of them, so we can read it unconditionally.

Instead of fixing both implementations of kvm_is_write_fault()
in place, reimplement it just once using kvm_vcpu_dabt_iswrite(),
which already does the right thing with respect to the WnR bit.
Also fix up the callers to pass 'vcpu'

Acked-by: Laszlo Ersek <lersek@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit a7d079cea2dffb112e26da2566dd84c0ef1fce97)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   | 11 -----------
 arch/arm/kvm/mmu.c               | 12 ++++++++++--
 arch/arm64/include/asm/kvm_mmu.h | 13 -------------
 3 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f5f72f..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 	flush_pmd_entry(pte);
 }
 
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
-	unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
-	if (hsr_ec == HSR_EC_IABT)
-		return false;
-	else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
-		return false;
-	else
-		return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd)
 {
 	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 62f5642153f9..bb06f76a8f89 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -746,6 +746,14 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
 	return false;
 }
 
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -760,7 +768,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
 
-	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
 		kvm_err("Unexpected L2 read permission error\n");
 		return -EFAULT;
@@ -886,7 +894,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	gfn = fault_ipa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
-	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	write_fault = kvm_is_write_fault(vcpu);
 	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 8e138c7c53ac..737da742b293 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -93,19 +93,6 @@ void kvm_clear_hyp_idmap(void);
 #define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
 #define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
 
-static inline bool kvm_is_write_fault(unsigned long esr)
-{
-	unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
-
-	if (esr_ec == ESR_EL2_EC_IABT)
-		return false;
-
-	if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
-		return false;
-
-	return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd) {}
 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
 static inline void kvm_clean_pte(pte_t *pte) {}

From cdead48c82db0e501edfce2c85b7834de2960d6f Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Mon, 1 Sep 2014 09:36:08 +0100
Subject: [PATCH 258/287] KVM: EVENTFD: remove inclusion of irq.h

No more needed. irq.h would be void on ARM.

Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 0ba09511ddc3ff0b462f37b4fe4b9c4dccc054ec)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/eventfd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 67563284f7b9..71ed39941b9c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,7 +36,6 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#include "irq.h"
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD

From 86318858caaa5a5e5b58f7bd2624c2a953eddf76 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:33 +0100
Subject: [PATCH 259/287] KVM: device: add simple registration mechanism for
 kvm_device_ops

kvm_ioctl_create_device currently has knowledge of all the device types
and their associated ops. This is fairly inflexible when adding support
for new in-kernel device emulations, so move what we currently have out
into a table, which can support dynamic registration of ops by new
drivers for virtual hardware.

Cc: Alex Williamson <Alex.Williamson@redhat.com>
Cc: Alex Graf <agraf@suse.de>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit d60eacb07053142bfb9b41582074a89a790a9d46)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  1 +
 include/uapi/linux/kvm.h | 21 ++++++++++----
 virt/kvm/kvm_main.c      | 60 ++++++++++++++++++++++++----------------
 3 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce4cf2f1c419..fc7b4270bc4a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1022,6 +1022,7 @@ struct kvm_device_ops {
 void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index bbc1f8a09eb8..00d2c69a3cb6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -848,14 +848,25 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define KVM_DEV_TYPE_FSL_MPIC_20	1
-#define KVM_DEV_TYPE_FSL_MPIC_42	2
-#define KVM_DEV_TYPE_XICS		3
-#define KVM_DEV_TYPE_VFIO		4
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
-#define KVM_DEV_TYPE_ARM_VGIC_V2	5
+
+enum kvm_device_type {
+	KVM_DEV_TYPE_FSL_MPIC_20	= 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20	KVM_DEV_TYPE_FSL_MPIC_20
+	KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42	KVM_DEV_TYPE_FSL_MPIC_42
+	KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS		KVM_DEV_TYPE_XICS
+	KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO		KVM_DEV_TYPE_VFIO
+	KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
+	KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
+	KVM_DEV_TYPE_MAX,
+};
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3ed27192a849..df006d12c14b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2261,6 +2261,37 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
 	return filp->private_data;
 }
 
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+#ifdef CONFIG_KVM_MPIC
+	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
+	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
+#endif
+
+#ifdef CONFIG_KVM_XICS
+	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
+#endif
+
+#ifdef CONFIG_KVM_VFIO
+	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
+#endif
+
+#ifdef CONFIG_KVM_ARM_VGIC
+	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
+#endif
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+	if (type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENOSPC;
+
+	if (kvm_device_ops_table[type] != NULL)
+		return -EEXIST;
+
+	kvm_device_ops_table[type] = ops;
+	return 0;
+}
+
 static int kvm_ioctl_create_device(struct kvm *kvm,
 				   struct kvm_create_device *cd)
 {
@@ -2269,31 +2300,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
 	int ret;
 
-	switch (cd->type) {
-#ifdef CONFIG_KVM_MPIC
-	case KVM_DEV_TYPE_FSL_MPIC_20:
-	case KVM_DEV_TYPE_FSL_MPIC_42:
-		ops = &kvm_mpic_ops;
-		break;
-#endif
-#ifdef CONFIG_KVM_XICS
-	case KVM_DEV_TYPE_XICS:
-		ops = &kvm_xics_ops;
-		break;
-#endif
-#ifdef CONFIG_KVM_VFIO
-	case KVM_DEV_TYPE_VFIO:
-		ops = &kvm_vfio_ops;
-		break;
-#endif
-#ifdef CONFIG_KVM_ARM_VGIC
-	case KVM_DEV_TYPE_ARM_VGIC_V2:
-		ops = &kvm_arm_vgic_v2_ops;
-		break;
-#endif
-	default:
+	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENODEV;
+
+	ops = kvm_device_ops_table[cd->type];
+	if (ops == NULL)
 		return -ENODEV;
-	}
 
 	if (test)
 		return 0;

From 9e95eca3b761209d08e97144de3962f355ee2078 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:34 +0100
Subject: [PATCH 260/287] KVM: ARM: vgic: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the ARM VGIC, instead of relying on the static table.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit c06a841bf36340e9e917ce60d11a6425ac85d0bd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |   1 -
 virt/kvm/arm/vgic.c      | 157 ++++++++++++++++++++-------------------
 virt/kvm/kvm_main.c      |   4 -
 3 files changed, 79 insertions(+), 83 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index fc7b4270bc4a..ed20bc0d209b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1027,7 +1027,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index efe6eee2e7eb..8c95b2468cd8 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void vgic_init_maintenance_interrupt(void *info)
-{
-	enable_percpu_irq(vgic->maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
-			   unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		vgic_init_maintenance_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic->maint_irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
-	.notifier_call = vgic_cpu_notify,
-};
-
-static const struct of_device_id vgic_ids[] = {
-	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
-	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
-	{},
-};
-
-int kvm_vgic_hyp_init(void)
-{
-	const struct of_device_id *matched_id;
-	const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-				const struct vgic_params **);
-	struct device_node *vgic_node;
-	int ret;
-
-	vgic_node = of_find_matching_node_and_match(NULL,
-						    vgic_ids, &matched_id);
-	if (!vgic_node) {
-		kvm_err("error: no compatible GIC node found\n");
-		return -ENODEV;
-	}
-
-	vgic_probe = matched_id->data;
-	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
-	if (ret)
-		return ret;
-
-	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
-				 "vgic", kvm_get_running_vcpus());
-	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
-		return ret;
-	}
-
-	ret = __register_cpu_notifier(&vgic_cpu_nb);
-	if (ret) {
-		kvm_err("Cannot register vgic CPU notifier\n");
-		goto out_free_irq;
-	}
-
-	/* Callback into for arch code for setup */
-	vgic_arch_setup(vgic);
-
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
-	return 0;
-
-out_free_irq:
-	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
-	return ret;
-}
-
 /**
  * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
@@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
 	return kvm_vgic_create(dev->kvm);
 }
 
-struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.name = "kvm-arm-vgic",
 	.create = vgic_create,
 	.destroy = vgic_destroy,
@@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.get_attr = vgic_get_attr,
 	.has_attr = vgic_has_attr,
 };
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+	enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+			   unsigned long action, void *cpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		vgic_init_maintenance_interrupt(NULL);
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		disable_percpu_irq(vgic->maint_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+	.notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+	{},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+	const struct of_device_id *matched_id;
+	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+			  const struct vgic_params **);
+	struct device_node *vgic_node;
+	int ret;
+
+	vgic_node = of_find_matching_node_and_match(NULL,
+						    vgic_ids, &matched_id);
+	if (!vgic_node) {
+		kvm_err("error: no compatible GIC node found\n");
+		return -ENODEV;
+	}
+
+	vgic_probe = matched_id->data;
+	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+	if (ret)
+		return ret;
+
+	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+				 "vgic", kvm_get_running_vcpus());
+	if (ret) {
+		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+		return ret;
+	}
+
+	ret = __register_cpu_notifier(&vgic_cpu_nb);
+	if (ret) {
+		kvm_err("Cannot register vgic CPU notifier\n");
+		goto out_free_irq;
+	}
+
+	/* Callback into for arch code for setup */
+	vgic_arch_setup(vgic);
+
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+	return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+				       KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+	return ret;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index df006d12c14b..111d560418f3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2274,10 +2274,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_VFIO
 	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
-
-#ifdef CONFIG_KVM_ARM_VGIC
-	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)

From 7bd2f48101c8f43cd36e9920c7315df8575b61a8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:36 +0100
Subject: [PATCH 261/287] KVM: VFIO: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the VFIO kvm device, instead of relying on the static table.

This is achieved by a module_init call to register the ops with KVM.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Alex Williamson <Alex.Williamson@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 80ce1639727e9d38729c34f162378508c307ca25)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  1 -
 virt/kvm/kvm_main.c      |  4 ----
 virt/kvm/vfio.c          | 22 +++++++++++++++-------
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ed20bc0d209b..f64e941a4213 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1026,7 +1026,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
-extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 111d560418f3..f019669674a5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2270,10 +2270,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_XICS
 	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
-
-#ifdef CONFIG_KVM_VFIO
-	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 597c258245ea..475487e238e1 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -189,6 +189,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
 }
 
+static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+
+static struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};
+
 static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 {
 	struct kvm_device *tmp;
@@ -211,10 +221,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 	return 0;
 }
 
-struct kvm_device_ops kvm_vfio_ops = {
-	.name = "kvm-vfio",
-	.create = kvm_vfio_create,
-	.destroy = kvm_vfio_destroy,
-	.set_attr = kvm_vfio_set_attr,
-	.has_attr = kvm_vfio_has_attr,
-};
+static int __init kvm_vfio_ops_init(void)
+{
+	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
+}
+module_init(kvm_vfio_ops_init);

From fa0603dfed451534fda7856d1301a6f81ae5de82 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 9 Jun 2014 12:27:18 +0200
Subject: [PATCH 262/287] arm/arm64: KVM: Rename irq_state to irq_pending

The irq_state field on the distributor struct is ambiguous in its
meaning; the comment says it's the level of the input put, but that
doesn't make much sense for edge-triggered interrupts.  The code
actually uses this state variable to check if the interrupt is in the
pending state on the distributor so clarify the comment and rename the
actual variable and accessor methods.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 227844f53864077ccaefe01d0960fcccc03445ce)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  4 ++--
 virt/kvm/arm/vgic.c    | 52 +++++++++++++++++++++---------------------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 35b0c121bb65..388d442eecb5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -140,8 +140,8 @@ struct vgic_dist {
 	/* Interrupt enabled (one bit per IRQ) */
 	struct vgic_bitmap	irq_enabled;
 
-	/* Interrupt 'pin' level */
-	struct vgic_bitmap	irq_state;
+	/* Interrupt state is pending on the distributor */
+	struct vgic_bitmap	irq_pending;
 
 	/* Level-triggered interrupt in progress */
 	struct vgic_bitmap	irq_active;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8c95b2468cd8..3d04a6de6f23 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -37,7 +37,7 @@
  *
  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
  *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
+ * - VGIC pending interrupts are stored on the vgic.irq_pending vgic
  *   bitmap (this bitmap is updated by both user land ioctls and guest
  *   mmio ops, and other in-kernel peripherals such as the
  *   arch. timers) and indicate the 'wire' state.
@@ -45,8 +45,8 @@
  *   recalculated
  * - To calculate the oracle, we need info for each cpu from
  *   compute_pending_for_cpu, which considers:
- *   - PPI: dist->irq_state & dist->irq_enable
- *   - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
+ *   - PPI: dist->irq_pending & dist->irq_enable
+ *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
  *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
  *     registers, stored on each vcpu. We only keep one bit of
  *     information per interrupt, making sure that only one vcpu can
@@ -221,21 +221,21 @@ static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
+	return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
 }
 
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
+	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
 }
 
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
+static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
+	vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
 }
 
 static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
@@ -409,7 +409,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
 				       vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
@@ -425,7 +425,7 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  struct kvm_exit_mmio *mmio,
 					  phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
+	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
 				       vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
@@ -651,7 +651,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * is fine, then we are only setting a few bits that were
 		 * already set.
 		 */
-		vgic_dist_irq_set(vcpu, lr.irq);
+		vgic_dist_irq_set_pending(vcpu, lr.irq);
 		if (lr.irq < VGIC_NR_SGIS)
 			dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
 		lr.state &= ~LR_STATE_PENDING;
@@ -932,7 +932,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 	kvm_for_each_vcpu(c, vcpu, kvm) {
 		if (target_cpus & 1) {
 			/* Flag the SGI as pending */
-			vgic_dist_irq_set(vcpu, sgi);
+			vgic_dist_irq_set_pending(vcpu, sgi);
 			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
 			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
 		}
@@ -952,11 +952,11 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 	pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
 	pend_shared = vcpu->arch.vgic_cpu.pending_shared;
 
-	pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
+	pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
 	enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
 	bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
 
-	pending = vgic_bitmap_get_shared_map(&dist->irq_state);
+	pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
 	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
 	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
 	bitmap_and(pend_shared, pend_shared,
@@ -1160,7 +1160,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 	 * our emulated gic and can get rid of them.
 	 */
 	if (!sources) {
-		vgic_dist_irq_clear(vcpu, irq);
+		vgic_dist_irq_clear_pending(vcpu, irq);
 		vgic_cpu_irq_clear(vcpu, irq);
 		return true;
 	}
@@ -1175,7 +1175,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 
 	if (vgic_queue_irq(vcpu, 0, irq)) {
 		if (vgic_irq_is_edge(vcpu, irq)) {
-			vgic_dist_irq_clear(vcpu, irq);
+			vgic_dist_irq_clear_pending(vcpu, irq);
 			vgic_cpu_irq_clear(vcpu, irq);
 		} else {
 			vgic_irq_set_active(vcpu, irq);
@@ -1376,7 +1376,7 @@ static void vgic_kick_vcpus(struct kvm *kvm)
 
 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 {
-	int is_edge = vgic_irq_is_edge(vcpu, irq);
+	int edge_triggered = vgic_irq_is_edge(vcpu, irq);
 	int state = vgic_dist_irq_is_pending(vcpu, irq);
 
 	/*
@@ -1384,26 +1384,26 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 	 * - edge triggered and we have a rising edge
 	 * - level triggered and we change level
 	 */
-	if (is_edge)
+	if (edge_triggered)
 		return level > state;
 	else
 		return level != state;
 }
 
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
+static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 				  unsigned int irq_num, bool level)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct kvm_vcpu *vcpu;
-	int is_edge, is_level;
+	int edge_triggered, level_triggered;
 	int enabled;
 	bool ret = true;
 
 	spin_lock(&dist->lock);
 
 	vcpu = kvm_get_vcpu(kvm, cpuid);
-	is_edge = vgic_irq_is_edge(vcpu, irq_num);
-	is_level = !is_edge;
+	edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
+	level_triggered = !edge_triggered;
 
 	if (!vgic_validate_injection(vcpu, irq_num, level)) {
 		ret = false;
@@ -1418,9 +1418,9 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
 
 	if (level)
-		vgic_dist_irq_set(vcpu, irq_num);
+		vgic_dist_irq_set_pending(vcpu, irq_num);
 	else
-		vgic_dist_irq_clear(vcpu, irq_num);
+		vgic_dist_irq_clear_pending(vcpu, irq_num);
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
 
@@ -1429,7 +1429,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 		goto out;
 	}
 
-	if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
+	if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) {
 		/*
 		 * Level interrupt in progress, will be picked up
 		 * when EOId.
@@ -1466,7 +1466,7 @@ static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
+	if (vgic_update_irq_pending(kvm, cpuid, irq_num, level))
 		vgic_kick_vcpus(kvm);
 
 	return 0;

From 25b3fb8068b572c2b9f549b17f513092309aa610 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 9 Jun 2014 12:55:13 +0200
Subject: [PATCH 263/287] arm/arm64: KVM: Rename irq_active to irq_queued

We have a special bitmap on the distributor struct to keep track of when
level-triggered interrupts are queued on the list registers.  This was
named irq_active, which is confusing, because the active state of an
interrupt as per the GIC spec is a different thing, not specifically
related to edge-triggered/level-triggered configurations but rather
indicates an interrupt which has been ack'ed but not yet eoi'ed.

Rename the bitmap and the corresponding accessor functions to irq_queued
to clarify what this is actually used for.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit dbf20f9d8105cca531614c8bff9a74351e8e67e7)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  4 ++--
 virt/kvm/arm/vgic.c    | 33 +++++++++++++++++++--------------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 388d442eecb5..7d8e61fa9928 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -143,8 +143,8 @@ struct vgic_dist {
 	/* Interrupt state is pending on the distributor */
 	struct vgic_bitmap	irq_pending;
 
-	/* Level-triggered interrupt in progress */
-	struct vgic_bitmap	irq_active;
+	/* Level-triggered interrupt queued on VCPU interface */
+	struct vgic_bitmap	irq_queued;
 
 	/* Interrupt priority. Not used yet. */
 	struct vgic_bytemap	irq_priority;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 3d04a6de6f23..769cc7177f10 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -60,12 +60,12 @@
  * the 'line' again. This is achieved as such:
  *
  * - When a level interrupt is moved onto a vcpu, the corresponding
- *   bit in irq_active is set. As long as this bit is set, the line
+ *   bit in irq_queued is set. As long as this bit is set, the line
  *   will be ignored for further interrupts. The interrupt is injected
  *   into the vcpu with the GICH_LR_EOI bit set (generate a
  *   maintenance interrupt on EOI).
  * - When the interrupt is EOIed, the maintenance interrupt fires,
- *   and clears the corresponding bit in irq_active. This allow the
+ *   and clears the corresponding bit in irq_queued. This allows the
  *   interrupt line to be sampled again.
  */
 
@@ -196,25 +196,25 @@ static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
 	return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
 }
 
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+	return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
 }
 
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
 }
 
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
-	vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
@@ -256,6 +256,11 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
 			  vcpu->arch.vgic_cpu.pending_shared);
 }
 
+static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
+{
+	return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+}
+
 static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
 {
 	return le32_to_cpu(*((u32 *)mmio->data)) & mask;
@@ -1079,8 +1084,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 
 		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
 			vgic_retire_lr(lr, vlr.irq, vcpu);
-			if (vgic_irq_is_active(vcpu, vlr.irq))
-				vgic_irq_clear_active(vcpu, vlr.irq);
+			if (vgic_irq_is_queued(vcpu, vlr.irq))
+				vgic_irq_clear_queued(vcpu, vlr.irq);
 		}
 	}
 }
@@ -1170,7 +1175,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 
 static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 {
-	if (vgic_irq_is_active(vcpu, irq))
+	if (!vgic_can_sample_irq(vcpu, irq))
 		return true; /* level interrupt, already queued */
 
 	if (vgic_queue_irq(vcpu, 0, irq)) {
@@ -1178,7 +1183,7 @@ static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
 			vgic_dist_irq_clear_pending(vcpu, irq);
 			vgic_cpu_irq_clear(vcpu, irq);
 		} else {
-			vgic_irq_set_active(vcpu, irq);
+			vgic_irq_set_queued(vcpu, irq);
 		}
 
 		return true;
@@ -1262,7 +1267,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-			vgic_irq_clear_active(vcpu, vlr.irq);
+			vgic_irq_clear_queued(vcpu, vlr.irq);
 			WARN_ON(vlr.state & LR_STATE_MASK);
 			vlr.state = 0;
 			vgic_set_lr(vcpu, lr, vlr);
@@ -1429,7 +1434,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 		goto out;
 	}
 
-	if (level_triggered && vgic_irq_is_active(vcpu, irq_num)) {
+	if (!vgic_can_sample_irq(vcpu, irq_num)) {
 		/*
 		 * Level interrupt in progress, will be picked up
 		 * when EOId.

From 520445af8ef7b1e8d17b692c0957b3802b90d98a Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:37:33 +0200
Subject: [PATCH 264/287] arm/arm64: KVM: vgic: Clear queued flags on unqueue

If we unqueue a level-triggered interrupt completely, and the LR does
not stick around in the active state (and will therefore no longer
generate a maintenance interrupt), then we should clear the queued flag
so that the vgic can actually queue this level-triggered interrupt at a
later time and deal with its pending state then.

Note: This should actually be properly fixed to handle the active state
on the distributor.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit cced50c9280ef7ca1af48080707a170efa1adfa0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 769cc7177f10..c7d068976069 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -667,8 +667,10 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * active), then the LR does not hold any useful info and can
 		 * be marked as free for other use.
 		 */
-		if (!(lr.state & LR_STATE_MASK))
+		if (!(lr.state & LR_STATE_MASK)) {
 			vgic_retire_lr(i, lr.irq, vcpu);
+			vgic_irq_clear_queued(vcpu, lr.irq);
+		}
 
 		/* Finally update the VGIC state. */
 		vgic_update_state(vcpu->kvm);

From fe8a7fe10d123135d9f8fee2eb0b0c14b31d609d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 21:54:51 +0200
Subject: [PATCH 265/287] arm/arm64: KVM: vgic: Improve handling of
 GICD_I{CS}PENDRn

Writes to GICD_ISPENDRn and GICD_ICPENDRn are currently not handled
correctly for level-triggered interrupts.  The spec states that for
level-triggered interrupts, writes to the GICD_ISPENDRn activate the
output of a flip-flop which is in turn or'ed with the actual input
interrupt signal.  Correspondingly, writes to GICD_ICPENDRn simply
deactivates the output of that flip-flop, but does not (of course) affect
the external input signal.  Reads from GICC_IAR will also deactivate the
flip-flop output.

This requires us to track the state of the level-input separately from
the state in the flip-flop.  We therefore introduce two new variables on
the distributor struct to track these two states.  Astute readers may
notice that this is introducing more state than required (because an OR
of the two states gives you the pending state), but the remaining vgic
code uses the pending bitmap for optimized operations to figure out, at
the end of the day, if an interrupt is pending or not on the distributor
side.  Refactoring the code to consider the two state variables all the
places where we currently access the precomputed pending value, did not
look pretty.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit faa1b46c3e9f4d40359aee04ff275eea5f4cae3a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  16 +++++-
 virt/kvm/arm/vgic.c    | 119 +++++++++++++++++++++++++++++++++++++----
 2 files changed, 123 insertions(+), 12 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7d8e61fa9928..f074539c6ac5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -140,9 +140,23 @@ struct vgic_dist {
 	/* Interrupt enabled (one bit per IRQ) */
 	struct vgic_bitmap	irq_enabled;
 
-	/* Interrupt state is pending on the distributor */
+	/* Level-triggered interrupt external input is asserted */
+	struct vgic_bitmap	irq_level;
+
+	/*
+	 * Interrupt state is pending on the distributor
+	 */
 	struct vgic_bitmap	irq_pending;
 
+	/*
+	 * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered
+	 * interrupts.  Essentially holds the state of the flip-flop in
+	 * Figure 4-10 on page 4-101 in ARM IHI 0048B.b.
+	 * Once set, it is only cleared for level-triggered interrupts on
+	 * guest ACKs (when we queue it) or writes to GICD_ICPENDRn.
+	 */
+	struct vgic_bitmap	irq_soft_pend;
+
 	/* Level-triggered interrupt queued on VCPU interface */
 	struct vgic_bitmap	irq_queued;
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index c7d068976069..07e97de1851b 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -67,6 +67,11 @@
  * - When the interrupt is EOIed, the maintenance interrupt fires,
  *   and clears the corresponding bit in irq_queued. This allows the
  *   interrupt line to be sampled again.
+ * - Note that level-triggered interrupts can also be set to pending from
+ *   writes to GICD_ISPENDRn and lowering the external input line does not
+ *   cause the interrupt to become inactive in such a situation.
+ *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
+ *   inactive as long as the external input line is held high.
  */
 
 #define VGIC_ADDR_UNDEF		(-1)
@@ -217,6 +222,41 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
 	vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
+static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
+}
+
 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -414,11 +454,26 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
-				       vcpu->vcpu_id, offset);
+	u32 *reg;
+	u32 level_mask;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
+	level_mask = (~(*reg));
+
+	/* Mark both level and edge triggered irqs as pending */
+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
 	if (mmio->is_write) {
+		/* Set the soft-pending flag only for level-triggered irqs */
+		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+					  vcpu->vcpu_id, offset);
+		vgic_reg_access(mmio, reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+		*reg &= level_mask;
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -430,11 +485,27 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  struct kvm_exit_mmio *mmio,
 					  phys_addr_t offset)
 {
-	u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_pending,
-				       vcpu->vcpu_id, offset);
+	u32 *level_active;
+	u32 *reg;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 	if (mmio->is_write) {
+		/* Re-set level triggered level-active interrupts */
+		level_active = vgic_bitmap_get_reg(&dist->irq_level,
+					  vcpu->vcpu_id, offset);
+		reg = vgic_bitmap_get_reg(&dist->irq_pending,
+					  vcpu->vcpu_id, offset);
+		*reg |= *level_active;
+
+		/* Clear soft-pending flags */
+		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+					  vcpu->vcpu_id, offset);
+		vgic_reg_access(mmio, reg, offset,
+				ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -1268,17 +1339,32 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 
 			vgic_irq_clear_queued(vcpu, vlr.irq);
 			WARN_ON(vlr.state & LR_STATE_MASK);
 			vlr.state = 0;
 			vgic_set_lr(vcpu, lr, vlr);
 
+			/*
+			 * If the IRQ was EOIed it was also ACKed and we we
+			 * therefore assume we can clear the soft pending
+			 * state (should it had been set) for this interrupt.
+			 *
+			 * Note: if the IRQ soft pending state was set after
+			 * the IRQ was acked, it actually shouldn't be
+			 * cleared, but we have no way of knowing that unless
+			 * we start trapping ACKs when the soft-pending state
+			 * is set.
+			 */
+			vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
 			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_is_pending(vcpu, vlr.irq)) {
+			if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
 				vgic_cpu_irq_set(vcpu, vlr.irq);
 				level_pending = true;
 			} else {
+				vgic_dist_irq_clear_pending(vcpu, vlr.irq);
 				vgic_cpu_irq_clear(vcpu, vlr.irq);
 			}
 
@@ -1384,17 +1470,19 @@ static void vgic_kick_vcpus(struct kvm *kvm)
 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
 {
 	int edge_triggered = vgic_irq_is_edge(vcpu, irq);
-	int state = vgic_dist_irq_is_pending(vcpu, irq);
 
 	/*
 	 * Only inject an interrupt if:
 	 * - edge triggered and we have a rising edge
 	 * - level triggered and we change level
 	 */
-	if (edge_triggered)
+	if (edge_triggered) {
+		int state = vgic_dist_irq_is_pending(vcpu, irq);
 		return level > state;
-	else
+	} else {
+		int state = vgic_dist_irq_get_level(vcpu, irq);
 		return level != state;
+	}
 }
 
 static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
@@ -1424,10 +1512,19 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
 	kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
 
-	if (level)
+	if (level) {
+		if (level_triggered)
+			vgic_dist_irq_set_level(vcpu, irq_num);
 		vgic_dist_irq_set_pending(vcpu, irq_num);
-	else
-		vgic_dist_irq_clear_pending(vcpu, irq_num);
+	} else {
+		if (level_triggered) {
+			vgic_dist_irq_clear_level(vcpu, irq_num);
+			if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+				vgic_dist_irq_clear_pending(vcpu, irq_num);
+		} else {
+			vgic_dist_irq_clear_pending(vcpu, irq_num);
+		}
+	}
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
 

From dd684bca5c33f00bdb31d2fb9819d6581489104f Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:30:45 +0200
Subject: [PATCH 266/287] arm/arm64: KVM: vgic: Fix SGI writes to
 GICD_I{CS}PENDR0

Writes to GICD_ISPENDR0 and GICD_ICPENDR0 ignore all settings of the
pending state for SGIs.  Make sure the implementation handles this
correctly.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 9da48b5502622f9f0e49df957521ec43a0c9f4c1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 07e97de1851b..4936a68d4b9b 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -454,7 +454,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 					struct kvm_exit_mmio *mmio,
 					phys_addr_t offset)
 {
-	u32 *reg;
+	u32 *reg, orig;
 	u32 level_mask;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
@@ -463,6 +463,7 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 
 	/* Mark both level and edge triggered irqs as pending */
 	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+	orig = *reg;
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 
@@ -474,6 +475,12 @@ static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
 				ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
 		*reg &= level_mask;
 
+		/* Ignore writes to SGIs */
+		if (offset < 2) {
+			*reg &= ~0xffff;
+			*reg |= orig & 0xffff;
+		}
+
 		vgic_update_state(vcpu->kvm);
 		return true;
 	}
@@ -486,10 +493,11 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  phys_addr_t offset)
 {
 	u32 *level_active;
-	u32 *reg;
+	u32 *reg, orig;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
 	reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+	orig = *reg;
 	vgic_reg_access(mmio, reg, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 	if (mmio->is_write) {
@@ -500,6 +508,12 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
 					  vcpu->vcpu_id, offset);
 		*reg |= *level_active;
 
+		/* Ignore writes to SGIs */
+		if (offset < 2) {
+			*reg &= ~0xffff;
+			*reg |= orig & 0xffff;
+		}
+
 		/* Clear soft-pending flags */
 		reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
 					  vcpu->vcpu_id, offset);

From ce492b1937416dc0f6da42a887b5190b1ca9b4fe Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 14 Jun 2014 22:34:04 +0200
Subject: [PATCH 267/287] arm/arm64: KVM: vgic: Clarify and correct vgic
 documentation

The VGIC virtual distributor implementation documentation was written a
very long time ago, before the true nature of the beast had been
partially absorbed into my bloodstream.  Clarify the docs.

Plus, it fixes an actual bug.  ICFRn, pfff.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 7e362919a59e6fc60e08ad1cf0b047291d1ca2e9)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 4936a68d4b9b..ff88dbcacc29 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -36,21 +36,22 @@
  * How the whole thing works (courtesy of Christoffer Dall):
  *
  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- *   something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_pending vgic
- *   bitmap (this bitmap is updated by both user land ioctls and guest
- *   mmio ops, and other in-kernel peripherals such as the
- *   arch. timers) and indicate the 'wire' state.
+ *   something is pending on the CPU interface.
+ * - Interrupts that are pending on the distributor are stored on the
+ *   vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
+ *   ioctls and guest mmio ops, and other in-kernel peripherals such as the
+ *   arch. timers).
  * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
  *   recalculated
  * - To calculate the oracle, we need info for each cpu from
  *   compute_pending_for_cpu, which considers:
  *   - PPI: dist->irq_pending & dist->irq_enable
  *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
- *   - irq_spi_target is a 'formatted' version of the GICD_ICFGR
+ *   - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
  *     registers, stored on each vcpu. We only keep one bit of
  *     information per interrupt, making sure that only one vcpu can
  *     accept the interrupt.
+ * - If any of the above state changes, we must recalculate the oracle.
  * - The same is true when injecting an interrupt, except that we only
  *   consider a single interrupt at a time. The irq_spi_cpu array
  *   contains the target CPU for each SPI.

From aef14f44d897ca82917d6e5d5ad847c4a9e24c40 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:00 +0100
Subject: [PATCH 268/287] KVM: ARM: vgic: plug irq injection race

As it stands, nothing prevents userspace from injecting an interrupt
before the guest's GIC is actually initialized.

This goes unnoticed so far (as everything is pretty much statically
allocated), but ends up exploding in a spectacular way once we switch
to a more dynamic allocation (the GIC data structure isn't there yet).

The fix is to test for the "ready" flag in the VGIC distributor before
trying to inject the interrupt. Note that in order to avoid breaking
userspace, we have to ignore what is essentially an error.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 71afaba4a2e98bb7bdeba5078370ab43d46e67a1)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index ff88dbcacc29..5744a49d7680 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1585,7 +1585,8 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (vgic_update_irq_pending(kvm, cpuid, irq_num, level))
+	if (likely(vgic_initialized(kvm)) &&
+	    vgic_update_irq_pending(kvm, cpuid, irq_num, level))
 		vgic_kick_vcpus(kvm);
 
 	return 0;

From 38cb2f7b9a9ea6312429da05575a4bf6e697b573 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:01 +0100
Subject: [PATCH 269/287] arm/arm64: KVM: vgic: switch to dynamic allocation

So far, all the VGIC data structures are statically defined by the
*maximum* number of vcpus and interrupts it supports. It means that
we always have to oversize it to cater for the worse case.

Start by changing the data structures to be dynamically sizeable,
and allocate them at runtime.

The sizes are still very static though.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit c1bfb577addd4867a82c4f235824a315d5afb94a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c     |   3 +
 include/kvm/arm_vgic.h |  76 ++++++++++---
 virt/kvm/arm/vgic.c    | 243 +++++++++++++++++++++++++++++++++++------
 3 files changed, 269 insertions(+), 53 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c8ff64b54459..9e374158363a 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -161,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
+
+	kvm_vgic_destroy(kvm);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -243,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
 	kvm_timer_vcpu_terminate(vcpu);
+	kvm_vgic_vcpu_destroy(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f074539c6ac5..fd1b8f252da1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -54,19 +54,33 @@
  * - a bunch of shared interrupts (SPI)
  */
 struct vgic_bitmap {
-	union {
-		u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
-	} percpu[VGIC_MAX_CPUS];
-	union {
-		u32 reg[VGIC_NR_SHARED_IRQS / 32];
-		DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
-	} shared;
+	/*
+	 * - One UL per VCPU for private interrupts (assumes UL is at
+	 *   least 32 bits)
+	 * - As many UL as necessary for shared interrupts.
+	 *
+	 * The private interrupts are accessed via the "private"
+	 * field, one UL per vcpu (the state for vcpu n is in
+	 * private[n]). The shared interrupts are accessed via the
+	 * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
+	 */
+	unsigned long *private;
+	unsigned long *shared;
 };
 
 struct vgic_bytemap {
-	u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
-	u32 shared[VGIC_NR_SHARED_IRQS  / 4];
+	/*
+	 * - 8 u32 per VCPU for private interrupts
+	 * - As many u32 as necessary for shared interrupts.
+	 *
+	 * The private interrupts are accessed via the "private"
+	 * field, (the state for vcpu n is in private[n*8] to
+	 * private[n*8 + 7]). The shared interrupts are accessed via
+	 * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
+	 * shared[(n-32)/4] word).
+	 */
+	u32 *private;
+	u32 *shared;
 };
 
 struct kvm_vcpu;
@@ -127,6 +141,9 @@ struct vgic_dist {
 	bool			in_kernel;
 	bool			ready;
 
+	int			nr_cpus;
+	int			nr_irqs;
+
 	/* Virtual control interface mapping */
 	void __iomem		*vctrl_base;
 
@@ -166,15 +183,36 @@ struct vgic_dist {
 	/* Level/edge triggered */
 	struct vgic_bitmap	irq_cfg;
 
-	/* Source CPU per SGI and target CPU */
-	u8			irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
+	/*
+	 * Source CPU per SGI and target CPU:
+	 *
+	 * Each byte represent a SGI observable on a VCPU, each bit of
+	 * this byte indicating if the corresponding VCPU has
+	 * generated this interrupt. This is a GICv2 feature only.
+	 *
+	 * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
+	 * the SGIs observable on VCPUn.
+	 */
+	u8			*irq_sgi_sources;
 
-	/* Target CPU for each IRQ */
-	u8			irq_spi_cpu[VGIC_NR_SHARED_IRQS];
-	struct vgic_bitmap	irq_spi_target[VGIC_MAX_CPUS];
+	/*
+	 * Target CPU for each SPI:
+	 *
+	 * Array of available SPI, each byte indicating the target
+	 * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
+	 */
+	u8			*irq_spi_cpu;
+
+	/*
+	 * Reverse lookup of irq_spi_cpu for faster compute pending:
+	 *
+	 * Array of bitmaps, one per VCPU, describing if IRQn is
+	 * routed to a particular VCPU.
+	 */
+	struct vgic_bitmap	*irq_spi_target;
 
 	/* Bitmap indicating which CPU has something pending */
-	unsigned long		irq_pending_on_cpu;
+	unsigned long		*irq_pending_on_cpu;
 #endif
 };
 
@@ -204,11 +242,11 @@ struct vgic_v3_cpu_if {
 struct vgic_cpu {
 #ifdef CONFIG_KVM_ARM_VGIC
 	/* per IRQ to LR mapping */
-	u8		vgic_irq_lr_map[VGIC_NR_IRQS];
+	u8		*vgic_irq_lr_map;
 
 	/* Pending interrupts on this VCPU */
 	DECLARE_BITMAP(	pending_percpu, VGIC_NR_PRIVATE_IRQS);
-	DECLARE_BITMAP(	pending_shared, VGIC_NR_SHARED_IRQS);
+	unsigned long	*pending_shared;
 
 	/* Bitmap of used/free list registers */
 	DECLARE_BITMAP(	lr_used, VGIC_V2_MAX_LRS);
@@ -239,7 +277,9 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
+void kvm_vgic_destroy(struct kvm *kvm);
 int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 5744a49d7680..102dde58c549 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -95,6 +95,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
 static void vgic_kick_vcpus(struct kvm *kvm);
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
@@ -105,10 +106,8 @@ static const struct vgic_ops *vgic_ops;
 static const struct vgic_params *vgic;
 
 /*
- * struct vgic_bitmap contains unions that provide two views of
- * the same data. In one case it is an array of registers of
- * u32's, and in the other case it is a bitmap of unsigned
- * longs.
+ * struct vgic_bitmap contains a bitmap made of unsigned longs, but
+ * extracts u32s out of them.
  *
  * This does not work on 64-bit BE systems, because the bitmap access
  * will store two consecutive 32-bit words with the higher-addressed
@@ -124,23 +123,45 @@ static const struct vgic_params *vgic;
 #define REG_OFFSET_SWIZZLE	0
 #endif
 
+static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
+{
+	int nr_longs;
+
+	nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+
+	b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
+	if (!b->private)
+		return -ENOMEM;
+
+	b->shared = b->private + nr_cpus;
+
+	return 0;
+}
+
+static void vgic_free_bitmap(struct vgic_bitmap *b)
+{
+	kfree(b->private);
+	b->private = NULL;
+	b->shared = NULL;
+}
+
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
 {
 	offset >>= 2;
 	if (!offset)
-		return x->percpu[cpuid].reg + (offset ^ REG_OFFSET_SWIZZLE);
+		return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
 	else
-		return x->shared.reg + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
+		return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
 }
 
 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
 				   int cpuid, int irq)
 {
 	if (irq < VGIC_NR_PRIVATE_IRQS)
-		return test_bit(irq, x->percpu[cpuid].reg_ul);
+		return test_bit(irq, x->private + cpuid);
 
-	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
+	return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
 }
 
 static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
@@ -149,9 +170,9 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
 	unsigned long *reg;
 
 	if (irq < VGIC_NR_PRIVATE_IRQS) {
-		reg = x->percpu[cpuid].reg_ul;
+		reg = x->private + cpuid;
 	} else {
-		reg =  x->shared.reg_ul;
+		reg = x->shared;
 		irq -= VGIC_NR_PRIVATE_IRQS;
 	}
 
@@ -163,24 +184,49 @@ static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
 
 static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
 {
-	if (unlikely(cpuid >= VGIC_MAX_CPUS))
-		return NULL;
-	return x->percpu[cpuid].reg_ul;
+	return x->private + cpuid;
 }
 
 static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
 {
-	return x->shared.reg_ul;
+	return x->shared;
+}
+
+static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
+{
+	int size;
+
+	size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
+	size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
+
+	x->private = kzalloc(size, GFP_KERNEL);
+	if (!x->private)
+		return -ENOMEM;
+
+	x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
+	return 0;
+}
+
+static void vgic_free_bytemap(struct vgic_bytemap *b)
+{
+	kfree(b->private);
+	b->private = NULL;
+	b->shared = NULL;
 }
 
 static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
 {
-	offset >>= 2;
-	BUG_ON(offset > (VGIC_NR_IRQS / 4));
-	if (offset < 8)
-		return x->percpu[cpuid] + offset;
-	else
-		return x->shared + offset - 8;
+	u32 *reg;
+
+	if (offset < VGIC_NR_PRIVATE_IRQS) {
+		reg = x->private;
+		offset += cpuid * VGIC_NR_PRIVATE_IRQS;
+	} else {
+		reg = x->shared;
+		offset -= VGIC_NR_PRIVATE_IRQS;
+	}
+
+	return reg + (offset / sizeof(u32));
 }
 
 #define VGIC_CFG_LEVEL	0
@@ -744,7 +790,7 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 */
 		vgic_dist_irq_set_pending(vcpu, lr.irq);
 		if (lr.irq < VGIC_NR_SGIS)
-			dist->irq_sgi_sources[vcpu_id][lr.irq] |= 1 << lr.source;
+			*vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
 		lr.state &= ~LR_STATE_PENDING;
 		vgic_set_lr(vcpu, i, lr);
 
@@ -778,7 +824,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 	/* Copy source SGIs from distributor side */
 	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
 		int shift = 8 * (sgi - min_sgi);
-		reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
+		reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
 	}
 
 	mmio_data_write(mmio, ~0, reg);
@@ -802,14 +848,15 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 	/* Clear pending SGIs on the distributor */
 	for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
 		u8 mask = reg >> (8 * (sgi - min_sgi));
+		u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
 		if (set) {
-			if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
+			if ((*src & mask) != mask)
 				updated = true;
-			dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
+			*src |= mask;
 		} else {
-			if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
+			if (*src & mask)
 				updated = true;
-			dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
+			*src &= ~mask;
 		}
 	}
 
@@ -993,6 +1040,11 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	return true;
 }
 
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
+{
+	return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
+}
+
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -1026,7 +1078,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 		if (target_cpus & 1) {
 			/* Flag the SGI as pending */
 			vgic_dist_irq_set_pending(vcpu, sgi);
-			dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
+			*vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
 			kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
 		}
 
@@ -1073,14 +1125,14 @@ static void vgic_update_state(struct kvm *kvm)
 	int c;
 
 	if (!dist->enabled) {
-		set_bit(0, &dist->irq_pending_on_cpu);
+		set_bit(0, dist->irq_pending_on_cpu);
 		return;
 	}
 
 	kvm_for_each_vcpu(c, vcpu, kvm) {
 		if (compute_pending_for_cpu(vcpu)) {
 			pr_debug("CPU%d has pending interrupts\n", c);
-			set_bit(c, &dist->irq_pending_on_cpu);
+			set_bit(c, dist->irq_pending_on_cpu);
 		}
 	}
 }
@@ -1237,14 +1289,14 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 	int vcpu_id = vcpu->vcpu_id;
 	int c;
 
-	sources = dist->irq_sgi_sources[vcpu_id][irq];
+	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
 
 	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
 		if (vgic_queue_irq(vcpu, c, irq))
 			clear_bit(c, &sources);
 	}
 
-	dist->irq_sgi_sources[vcpu_id][irq] = sources;
+	*vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
 
 	/*
 	 * If the sources bitmap has been cleared it means that we
@@ -1332,7 +1384,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 		 * us. Claim we don't have anything pending. We'll
 		 * adjust that if needed while exiting.
 		 */
-		clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
+		clear_bit(vcpu_id, dist->irq_pending_on_cpu);
 	}
 }
 
@@ -1430,7 +1482,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	/* Check if we still have something up our sleeve... */
 	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
 	if (level_pending || pending < vgic->nr_lr)
-		set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+		set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
@@ -1464,7 +1516,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 	if (!irqchip_in_kernel(vcpu->kvm))
 		return 0;
 
-	return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
+	return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
 static void vgic_kick_vcpus(struct kvm *kvm)
@@ -1559,7 +1611,7 @@ static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 
 	if (level) {
 		vgic_cpu_irq_set(vcpu, irq_num);
-		set_bit(cpuid, &dist->irq_pending_on_cpu);
+		set_bit(cpuid, dist->irq_pending_on_cpu);
 	}
 
 out:
@@ -1603,6 +1655,32 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	kfree(vgic_cpu->pending_shared);
+	kfree(vgic_cpu->vgic_irq_lr_map);
+	vgic_cpu->pending_shared = NULL;
+	vgic_cpu->vgic_irq_lr_map = NULL;
+}
+
+static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
+{
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+	vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+
+	if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+		kvm_vgic_vcpu_destroy(vcpu);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 /**
  * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
  * @vcpu: pointer to the vcpu struct
@@ -1642,6 +1720,97 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vgic_vcpu_destroy(vcpu);
+
+	vgic_free_bitmap(&dist->irq_enabled);
+	vgic_free_bitmap(&dist->irq_level);
+	vgic_free_bitmap(&dist->irq_pending);
+	vgic_free_bitmap(&dist->irq_soft_pend);
+	vgic_free_bitmap(&dist->irq_queued);
+	vgic_free_bitmap(&dist->irq_cfg);
+	vgic_free_bytemap(&dist->irq_priority);
+	if (dist->irq_spi_target) {
+		for (i = 0; i < dist->nr_cpus; i++)
+			vgic_free_bitmap(&dist->irq_spi_target[i]);
+	}
+	kfree(dist->irq_sgi_sources);
+	kfree(dist->irq_spi_cpu);
+	kfree(dist->irq_spi_target);
+	kfree(dist->irq_pending_on_cpu);
+	dist->irq_sgi_sources = NULL;
+	dist->irq_spi_cpu = NULL;
+	dist->irq_spi_target = NULL;
+	dist->irq_pending_on_cpu = NULL;
+}
+
+/*
+ * Allocate and initialize the various data structures. Must be called
+ * with kvm->lock held!
+ */
+static int vgic_init_maps(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct kvm_vcpu *vcpu;
+	int nr_cpus, nr_irqs;
+	int ret, i;
+
+	nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS;
+	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
+
+	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+	ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
+	ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
+
+	if (ret)
+		goto out;
+
+	dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
+	dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
+	dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
+				       GFP_KERNEL);
+	dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+					   GFP_KERNEL);
+	if (!dist->irq_sgi_sources ||
+	    !dist->irq_spi_cpu ||
+	    !dist->irq_spi_target ||
+	    !dist->irq_pending_on_cpu) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nr_cpus; i++)
+		ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
+					nr_cpus, nr_irqs);
+
+	if (ret)
+		goto out;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
+		if (ret) {
+			kvm_err("VGIC: Failed to allocate vcpu memory\n");
+			break;
+		}
+	}
+
+out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
+
+	return ret;
+}
+
 /**
  * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
@@ -1722,6 +1891,10 @@ int kvm_vgic_create(struct kvm *kvm)
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 
+	ret = vgic_init_maps(kvm);
+	if (ret)
+		kvm_err("Unable to allocate maps\n");
+
 out_unlock:
 	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
 		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);

From ac2409584bc7bb3b8cff23884e65cf341860d674 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:02 +0100
Subject: [PATCH 270/287] arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS

Having a dynamic number of supported interrupts means that we
cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore.

Instead, make it take the distributor structure as a parameter,
so it can return the right value.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit fb65ab63b8cae510ea1e43e68b5da2f9980aa6d5)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  1 -
 virt/kvm/arm/vgic.c    | 16 +++++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index fd1b8f252da1..b2f9936df319 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -29,7 +29,6 @@
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS	(VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
 #define VGIC_MAX_CPUS		KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 102dde58c549..a24fdacf5381 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1086,11 +1086,17 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 	}
 }
 
+static int vgic_nr_shared_irqs(struct vgic_dist *dist)
+{
+	return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+}
+
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
 	unsigned long pending_private, pending_shared;
+	int nr_shared = vgic_nr_shared_irqs(dist);
 	int vcpu_id;
 
 	vcpu_id = vcpu->vcpu_id;
@@ -1103,15 +1109,15 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 
 	pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
 	enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
-	bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
+	bitmap_and(pend_shared, pending, enabled, nr_shared);
 	bitmap_and(pend_shared, pend_shared,
 		   vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
-		   VGIC_NR_SHARED_IRQS);
+		   nr_shared);
 
 	pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
-	pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
+	pending_shared = find_first_bit(pend_shared, nr_shared);
 	return (pending_private < VGIC_NR_PRIVATE_IRQS ||
-		pending_shared < VGIC_NR_SHARED_IRQS);
+		pending_shared < vgic_nr_shared_irqs(dist));
 }
 
 /*
@@ -1368,7 +1374,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* SPIs */
-	for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
+	for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
 		if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
 			overflow = 1;
 	}

From 3e0fb55b4af81f2364b1969bdbf31bf67883a4d5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:03 +0100
Subject: [PATCH 271/287] arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS

We now have the information about the number of CPU interfaces in
the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just
rely on KVM_MAX_VCPUS where we don't have the choice. Yet.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit fc675e355e705a046df7b635d3f3330c0ad94569)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 3 +--
 virt/kvm/arm/vgic.c    | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index b2f9936df319..3b73d7845124 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -29,13 +29,12 @@
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_MAX_CPUS		KVM_MAX_VCPUS
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
 #define VGIC_V3_MAX_LRS		16
 
 /* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
+#if (KVM_MAX_VCPUS > 8)
 #error	Invalid number of CPU interfaces
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index a24fdacf5381..df0700bd0f23 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1297,7 +1297,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 
 	sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
 
-	for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
+	for_each_set_bit(c, &sources, dist->nr_cpus) {
 		if (vgic_queue_irq(vcpu, c, irq))
 			clear_bit(c, &sources);
 	}
@@ -1700,7 +1700,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int i;
 
-	if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
+	if (vcpu->vcpu_id >= dist->nr_cpus)
 		return -EBUSY;
 
 	for (i = 0; i < VGIC_NR_IRQS; i++) {
@@ -1767,7 +1767,7 @@ static int vgic_init_maps(struct kvm *kvm)
 	int nr_cpus, nr_irqs;
 	int ret, i;
 
-	nr_cpus = dist->nr_cpus = VGIC_MAX_CPUS;
+	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
 	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
 
 	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);

From 35a089f06fa6d1f02265be8b1e31e3775003d5c8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:04 +0100
Subject: [PATCH 272/287] arm/arm64: KVM: vgic: handle out-of-range MMIO
 accesses

Now that we can (almost) dynamically size the number of interrupts,
we're facing an interesting issue:

We have to evaluate at runtime whether or not an access hits a valid
register, based on the sizing of this particular instance of the
distributor. Furthermore, the GIC spec says that accessing a reserved
register is RAZ/WI.

For this, add a new field to our range structure, indicating the number
of bits a single interrupts uses. That allows us to find out whether or
not the access is in range.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit c3c918361adcceb816c92b21dd95d2b46fb96a8f)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  3 ++-
 virt/kvm/arm/vgic.c    | 56 +++++++++++++++++++++++++++++++++---------
 2 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3b73d7845124..2767f939f47c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -32,6 +32,7 @@
 
 #define VGIC_V2_MAX_LRS		(1 << 6)
 #define VGIC_V3_MAX_LRS		16
+#define VGIC_MAX_IRQS		1024
 
 /* Sanity checks... */
 #if (KVM_MAX_VCPUS > 8)
@@ -42,7 +43,7 @@
 #error "VGIC_NR_IRQS must be a multiple of 32"
 #endif
 
-#if (VGIC_NR_IRQS > 1024)
+#if (VGIC_NR_IRQS > VGIC_MAX_IRQS)
 #error "VGIC_NR_IRQS must be <= 1024"
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index df0700bd0f23..de975c908301 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -895,6 +895,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
 struct mmio_range {
 	phys_addr_t base;
 	unsigned long len;
+	int bits_per_irq;
 	bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
 			    phys_addr_t offset);
 };
@@ -903,56 +904,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
 	{
 		.base		= GIC_DIST_CTRL,
 		.len		= 12,
+		.bits_per_irq	= 0,
 		.handle_mmio	= handle_mmio_misc,
 	},
 	{
 		.base		= GIC_DIST_IGROUP,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_ENABLE_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_set_enable_reg,
 	},
 	{
 		.base		= GIC_DIST_ENABLE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_clear_enable_reg,
 	},
 	{
 		.base		= GIC_DIST_PENDING_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_set_pending_reg,
 	},
 	{
 		.base		= GIC_DIST_PENDING_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_clear_pending_reg,
 	},
 	{
 		.base		= GIC_DIST_ACTIVE_SET,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_ACTIVE_CLEAR,
-		.len		= VGIC_NR_IRQS / 8,
+		.len		= VGIC_MAX_IRQS / 8,
+		.bits_per_irq	= 1,
 		.handle_mmio	= handle_mmio_raz_wi,
 	},
 	{
 		.base		= GIC_DIST_PRI,
-		.len		= VGIC_NR_IRQS,
+		.len		= VGIC_MAX_IRQS,
+		.bits_per_irq	= 8,
 		.handle_mmio	= handle_mmio_priority_reg,
 	},
 	{
 		.base		= GIC_DIST_TARGET,
-		.len		= VGIC_NR_IRQS,
+		.len		= VGIC_MAX_IRQS,
+		.bits_per_irq	= 8,
 		.handle_mmio	= handle_mmio_target_reg,
 	},
 	{
 		.base		= GIC_DIST_CONFIG,
-		.len		= VGIC_NR_IRQS / 4,
+		.len		= VGIC_MAX_IRQS / 4,
+		.bits_per_irq	= 2,
 		.handle_mmio	= handle_mmio_cfg_reg,
 	},
 	{
@@ -990,6 +1002,22 @@ struct mmio_range *find_matching_range(const struct mmio_range *ranges,
 	return NULL;
 }
 
+static bool vgic_validate_access(const struct vgic_dist *dist,
+				 const struct mmio_range *range,
+				 unsigned long offset)
+{
+	int irq;
+
+	if (!range->bits_per_irq)
+		return true;	/* Not an irq-based access */
+
+	irq = offset * 8 / range->bits_per_irq;
+	if (irq >= dist->nr_irqs)
+		return false;
+
+	return true;
+}
+
 /**
  * vgic_handle_mmio - handle an in-kernel MMIO access
  * @vcpu:	pointer to the vcpu performing the access
@@ -1029,7 +1057,13 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 	spin_lock(&vcpu->kvm->arch.vgic.lock);
 	offset = mmio->phys_addr - range->base - base;
-	updated_state = range->handle_mmio(vcpu, mmio, offset);
+	if (vgic_validate_access(dist, range, offset)) {
+		updated_state = range->handle_mmio(vcpu, mmio, offset);
+	} else {
+		vgic_reg_access(mmio, NULL, offset,
+				ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+		updated_state = false;
+	}
 	spin_unlock(&vcpu->kvm->arch.vgic.lock);
 	kvm_prepare_mmio(run, mmio);
 	kvm_handle_mmio_return(vcpu, run);

From ddfab003a2dbff1e226a24860e8f4f91a7d3d131 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:05 +0100
Subject: [PATCH 273/287] arm/arm64: KVM: vgic: kill VGIC_NR_IRQS

Nuke VGIC_NR_IRQS entierly, now that the distributor instance
contains the number of IRQ allocated to this GIC.

Also add VGIC_NR_IRQS_LEGACY to preserve the current API.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 5fb66da64064d0cb8dcce4cc8bf4cb1b921b13a0)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h |  6 +++---
 virt/kvm/arm/vgic.c    | 17 +++++++++++------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 2767f939f47c..aa20d4a7242f 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,7 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-#define VGIC_NR_IRQS		256
+#define VGIC_NR_IRQS_LEGACY	256
 #define VGIC_NR_SGIS		16
 #define VGIC_NR_PPIS		16
 #define VGIC_NR_PRIVATE_IRQS	(VGIC_NR_SGIS + VGIC_NR_PPIS)
@@ -39,11 +39,11 @@
 #error	Invalid number of CPU interfaces
 #endif
 
-#if (VGIC_NR_IRQS & 31)
+#if (VGIC_NR_IRQS_LEGACY & 31)
 #error "VGIC_NR_IRQS must be a multiple of 32"
 #endif
 
-#if (VGIC_NR_IRQS > VGIC_MAX_IRQS)
+#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
 #error "VGIC_NR_IRQS must be <= 1024"
 #endif
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index de975c908301..49501bbc2709 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -439,7 +439,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
 
 	case 4:			/* GICD_TYPER */
 		reg  = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
-		reg |= (VGIC_NR_IRQS >> 5) - 1;
+		reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
 		vgic_reg_access(mmio, &reg, word_offset,
 				ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
 		break;
@@ -1277,13 +1277,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_lr vlr;
 	int lr;
 
 	/* Sanitize the input... */
 	BUG_ON(sgi_source_id & ~7);
 	BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
-	BUG_ON(irq >= VGIC_NR_IRQS);
+	BUG_ON(irq >= dist->nr_irqs);
 
 	kvm_debug("Queue IRQ%d\n", irq);
 
@@ -1515,7 +1516,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 
 		vlr = vgic_get_lr(vcpu, lr);
 
-		BUG_ON(vlr.irq >= VGIC_NR_IRQS);
+		BUG_ON(vlr.irq >= dist->nr_irqs);
 		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
 	}
 
@@ -1737,7 +1738,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	if (vcpu->vcpu_id >= dist->nr_cpus)
 		return -EBUSY;
 
-	for (i = 0; i < VGIC_NR_IRQS; i++) {
+	for (i = 0; i < dist->nr_irqs; i++) {
 		if (i < VGIC_NR_PPIS)
 			vgic_bitmap_set_irq_val(&dist->irq_enabled,
 						vcpu->vcpu_id, i, 1);
@@ -1802,7 +1803,11 @@ static int vgic_init_maps(struct kvm *kvm)
 	int ret, i;
 
 	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
-	nr_irqs = dist->nr_irqs = VGIC_NR_IRQS;
+
+	if (!dist->nr_irqs)
+		dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
+
+	nr_irqs = dist->nr_irqs;
 
 	ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
 	ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
@@ -1886,7 +1891,7 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
+	for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
 		vgic_set_target_reg(kvm, 0, i);
 
 	kvm->arch.vgic.ready = true;

From b9ca28a414c2e5dfb707bcba625eb76f334c787f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:06 +0100
Subject: [PATCH 274/287] arm/arm64: KVM: vgic: delay vgic allocation until
 init time

It is now quite easy to delay the allocation of the vgic tables
until we actually require it to be up and running (when the first
vcpu is kicking around, or someones tries to access the GIC registers).

This allow us to allocate memory for the exact number of CPUs we
have. As nobody configures the number of interrupts just yet,
use a fallback to VGIC_NR_IRQS_LEGACY.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit 4956f2bc1fdee4bc336532f3f34635a8534cedfd)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c     |  7 -------
 include/kvm/arm_vgic.h |  1 -
 virt/kvm/arm/vgic.c    | 42 +++++++++++++++++++++++++++++-------------
 3 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e374158363a..072a2084005c 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -261,16 +261,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	int ret;
-
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
 
-	/* Set up VGIC */
-	ret = kvm_vgic_vcpu_init(vcpu);
-	if (ret)
-		return ret;
-
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index aa20d4a7242f..2f2aac8448a4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -277,7 +277,6 @@ int kvm_vgic_hyp_init(void);
 int kvm_vgic_init(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
 void kvm_vgic_destroy(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 49501bbc2709..e7bca4bb7fd1 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1729,15 +1729,12 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
  * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
  * this vcpu and enable the VGIC for this VCPU
  */
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int i;
 
-	if (vcpu->vcpu_id >= dist->nr_cpus)
-		return -EBUSY;
-
 	for (i = 0; i < dist->nr_irqs; i++) {
 		if (i < VGIC_NR_PPIS)
 			vgic_bitmap_set_irq_val(&dist->irq_enabled,
@@ -1757,8 +1754,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	vgic_cpu->nr_lr = vgic->nr_lr;
 
 	vgic_enable(vcpu);
-
-	return 0;
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
@@ -1802,8 +1797,17 @@ static int vgic_init_maps(struct kvm *kvm)
 	int nr_cpus, nr_irqs;
 	int ret, i;
 
-	nr_cpus = dist->nr_cpus = KVM_MAX_VCPUS;
+	if (dist->nr_cpus)	/* Already allocated */
+		return 0;
 
+	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+	if (!nr_cpus)		/* No vcpus? Can't be good... */
+		return -EINVAL;
+
+	/*
+	 * If nobody configured the number of interrupts, use the
+	 * legacy one.
+	 */
 	if (!dist->nr_irqs)
 		dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
 
@@ -1849,6 +1853,9 @@ static int vgic_init_maps(struct kvm *kvm)
 		}
 	}
 
+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+		vgic_set_target_reg(kvm, 0, i);
+
 out:
 	if (ret)
 		kvm_vgic_destroy(kvm);
@@ -1867,6 +1874,7 @@ static int vgic_init_maps(struct kvm *kvm)
  */
 int kvm_vgic_init(struct kvm *kvm)
 {
+	struct kvm_vcpu *vcpu;
 	int ret = 0, i;
 
 	if (!irqchip_in_kernel(kvm))
@@ -1884,6 +1892,12 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
+	ret = vgic_init_maps(kvm);
+	if (ret) {
+		kvm_err("Unable to allocate maps\n");
+		goto out;
+	}
+
 	ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
 				    vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
 	if (ret) {
@@ -1891,11 +1905,13 @@ int kvm_vgic_init(struct kvm *kvm)
 		goto out;
 	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_vgic_vcpu_init(vcpu);
 
 	kvm->arch.vgic.ready = true;
 out:
+	if (ret)
+		kvm_vgic_destroy(kvm);
 	mutex_unlock(&kvm->lock);
 	return ret;
 }
@@ -1936,10 +1952,6 @@ int kvm_vgic_create(struct kvm *kvm)
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 
-	ret = vgic_init_maps(kvm);
-	if (ret)
-		kvm_err("Unable to allocate maps\n");
-
 out_unlock:
 	for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
 		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
@@ -2140,6 +2152,10 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
 	mutex_lock(&dev->kvm->lock);
 
+	ret = vgic_init_maps(dev->kvm);
+	if (ret)
+		goto out;
+
 	if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
 		ret = -EINVAL;
 		goto out;

From e1fde0a1e746c70d026c1388c9d1c0bce278ae01 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:07 +0100
Subject: [PATCH 275/287] arm/arm64: KVM: vgic: make number of irqs a
 configurable attribute

In order to make the number of interrupts configurable, use the new
fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as
a VGIC configurable attribute.

Userspace can now specify the exact size of the GIC (by increments
of 32 interrupts).

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
(cherry picked from commit a98f26f183801685ef57333de4bafd4bbc692c7c)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 .../virtual/kvm/devices/arm-vgic.txt          | 10 +++++
 arch/arm/include/uapi/asm/kvm.h               |  1 +
 arch/arm64/include/uapi/asm/kvm.h             |  1 +
 virt/kvm/arm/vgic.c                           | 37 +++++++++++++++++++
 4 files changed, 49 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 7f4e91b1316b..df8b0c7540b6 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -71,3 +71,13 @@ Groups:
   Errors:
     -ENODEV: Getting or setting this register is not yet supported
     -EBUSY: One or more VCPUs are running
+
+  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+  Attributes:
+    A value describing the number of interrupts (SGI, PPI and SPI) for
+    this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+  Errors:
+    -EINVAL: Value set is out of the expected range
+    -EBUSY: Value has already be set, or GIC has already been initialized
+            with default values.
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 51257fda254b..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -174,6 +174,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f4ec5a674d05..8e38878c87c6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -160,6 +160,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e7bca4bb7fd1..43b56c696752 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2253,6 +2253,36 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
 		return vgic_attr_regs_access(dev, attr, &reg, true);
 	}
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		u32 val;
+		int ret = 0;
+
+		if (get_user(val, uaddr))
+			return -EFAULT;
+
+		/*
+		 * We require:
+		 * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
+		 * - at most 1024 interrupts
+		 * - a multiple of 32 interrupts
+		 */
+		if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
+		    val > VGIC_MAX_IRQS ||
+		    (val & 31))
+			return -EINVAL;
+
+		mutex_lock(&dev->kvm->lock);
+
+		if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+			ret = -EBUSY;
+		else
+			dev->kvm->arch.vgic.nr_irqs = val;
+
+		mutex_unlock(&dev->kvm->lock);
+
+		return ret;
+	}
 
 	}
 
@@ -2289,6 +2319,11 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = put_user(reg, uaddr);
 		break;
 	}
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+		u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+		r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
+		break;
+	}
 
 	}
 
@@ -2325,6 +2360,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 	case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
 		offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
 		return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+	case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+		return 0;
 	}
 	return -ENXIO;
 }

From b6c20297368fc6c782c0e5709f0d261bfdee2f6e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 25 Sep 2014 18:41:07 +0200
Subject: [PATCH 276/287] arm/arm64: KVM: Fix set_clear_sgi_pend_reg offset

The sgi values calculated in read_set_clear_sgi_pend_reg() and
write_set_clear_sgi_pend_reg() were horribly incorrectly multiplied by 4
with catastrophic results in that subfunctions ended up overwriting
memory not allocated for the expected purpose.

This showed up as bugs in kfree() and the kernel complaining a lot of
you turn on memory debugging.

This addresses: http://marc.info/?l=kvm&m=141164910007868&w=2

Reported-by: Shannon Zhao <zhaoshenglong@huawei.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 0fea6d7628ed6e25a9ee1b67edf7c859718d39e8)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 43b56c696752..506693152e47 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -816,7 +816,7 @@ static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int sgi;
-	int min_sgi = (offset & ~0x3) * 4;
+	int min_sgi = (offset & ~0x3);
 	int max_sgi = min_sgi + 3;
 	int vcpu_id = vcpu->vcpu_id;
 	u32 reg = 0;
@@ -837,7 +837,7 @@ static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	int sgi;
-	int min_sgi = (offset & ~0x3) * 4;
+	int min_sgi = (offset & ~0x3);
 	int max_sgi = min_sgi + 3;
 	int vcpu_id = vcpu->vcpu_id;
 	u32 reg;

From 17434ac66579bb31d6a129edb7dec7989b0a7a37 Mon Sep 17 00:00:00 2001
From: Joel Schopp <joel.schopp@amd.com>
Date: Wed, 9 Jul 2014 11:17:04 -0500
Subject: [PATCH 277/287] arm/arm64: KVM: Fix VTTBR_BADDR_MASK and pgd alloc

The current aarch64 calculation for VTTBR_BADDR_MASK masks only 39 bits
and not all the bits in the PA range. This is clearly a bug that
manifests itself on systems that allocate memory in the higher address
space range.

 [ Modified from Joel's original patch to be based on PHYS_MASK_SHIFT
   instead of a hard-coded value and to move the alignment check of the
   allocation to mmu.c.  Also added a comment explaining why we hardcode
   the IPA range and changed the stage-2 pgd allocation to be based on
   the 40 bit IPA range instead of the maximum possible 48 bit PA range.
   - Christoffer ]

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Joel Schopp <joel.schopp@amd.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit dbff124e29fa24aff9705b354b5f4648cd96e0bb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c               |  4 ++--
 arch/arm64/include/asm/kvm_arm.h | 13 ++++++++++++-
 arch/arm64/include/asm/kvm_mmu.h |  5 ++---
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 072a2084005c..15111ca6fe30 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -410,9 +410,9 @@ static void update_vttbr(struct kvm *kvm)
 
 	/* update vttbr to be used with the new vmid */
 	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
 	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
-	kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
-	kvm->arch.vttbr |= vmid;
+	kvm->arch.vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520459ed..7fd3e27e3ccc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -122,6 +122,17 @@
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_40B	24
 
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
 #ifdef CONFIG_ARM64_64K_PAGES
 /*
  * Stage2 translation configuration:
@@ -149,7 +160,7 @@
 #endif
 
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK  (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_VMID_SHIFT  (48LLU)
 #define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 737da742b293..a030d163840b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -59,10 +59,9 @@
 #define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
 
 /*
- * Align KVM with the kernel's view of physical memory. Should be
- * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ * We currently only support a 40bit IPA.
  */
-#define KVM_PHYS_SHIFT	PHYS_MASK_SHIFT
+#define KVM_PHYS_SHIFT	(40)
 #define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
 

From 7f79ef21e90a97b535ca0f181929754cd72bda10 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 26 Sep 2014 12:29:34 +0200
Subject: [PATCH 278/287] arm/arm64: KVM: Report correct FSC for unsupported
 fault types

When we catch something that's not a permission fault or a translation
fault, we log the unsupported FSC in the kernel log, but we were masking
off the bottom bits of the FSC which was not very helpful.

Also correctly report the FSC for data and instruction faults rather
than telling people it was a DFCS, which doesn't exist in the ARM ARM.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 0496daa5cf99741ce8db82686b4c7446a37feabb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 5 +++++
 arch/arm/kvm/mmu.c                   | 8 +++++---
 arch/arm64/include/asm/kvm_emulate.h | 5 +++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 69b746955fca..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -148,6 +148,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
 }
 
 static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index bb06f76a8f89..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -882,10 +882,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
 
 	/* Check the stage-2 fault is trans. fault or write fault */
-	fault_status = kvm_vcpu_trap_get_fault(vcpu);
+	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
-		kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
-			kvm_vcpu_trap_get_class(vcpu), fault_status);
+		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+			kvm_vcpu_trap_get_class(vcpu),
+			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+			(unsigned long)kvm_vcpu_get_hsr(vcpu));
 		return -EFAULT;
 	}
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fdc3e21abd8d..5674a55b5518 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -173,6 +173,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
 }
 
 static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
 }

From 2dc5e2cf90915059fbc39ef380605aef6adaef47 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 22 Sep 2014 15:52:48 +0100
Subject: [PATCH 279/287] arm: kvm: fix CPU hotplug

On some platforms with no power management capabilities, the hotplug
implementation is allowed to return from a smp_ops.cpu_die() call as a
function return. Upon a CPU onlining event, the KVM CPU notifier tries
to reinstall the hyp stub, which fails on platform where no reset took
place following a hotplug event, with the message:

CPU1: smp_ops.cpu_die() returned, trying to resuscitate
CPU1: Booted secondary processor
Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x80409540
unexpected data abort in Hyp mode at: 0x80401fe8
unexpected HVC/SVC trap in Hyp mode at: 0x805c6170

since KVM code is trying to reinstall the stub on a system where it is
already configured.

To prevent this issue, this patch adds a check in the KVM hotplug
notifier that detects if the HYP stub really needs re-installing when a
CPU is onlined and skips the installation call if the stub is already in
place, which means that the CPU has not been reset.

Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
(cherry picked from commit 37a34ac1d4775aafbc73b9db53c7daebbbc67e6a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 15111ca6fe30..d0c8ee654bbf 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -808,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
 	switch (action) {
 	case CPU_STARTING:
 	case CPU_STARTING_FROZEN:
-		cpu_init_hyp_mode(NULL);
+		if (__hyp_get_vectors() == hyp_default_vectors)
+			cpu_init_hyp_mode(NULL);
 		break;
 	}
 

From ad4ef3e73e40166a585d71f36a521f9f6d5db3c4 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 2 Oct 2014 11:53:28 +0200
Subject: [PATCH 280/287] Revert "arm, kvm: fix double lock on
 cpu_add_remove_lock"

This reverts commit d77503eadd2f16f2900b9be79a1dc6f37e8cd579.  The whole
register cpu hotplug fix series has not been applied, so LSK is released
without this fix.  If we ever include that series in LSK later, then
this can be fixed later too.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
(cherry picked from commit 553f809e23f00976caea7a1ebdabaa58a6383e7d)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 2 +-
 virt/kvm/arm/vgic.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 22fa819a9b6a..5081e809821f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void)
 
 	host_vtimer_irq = ppi;
 
-	err = __register_cpu_notifier(&kvm_timer_cpu_nb);
+	err = register_cpu_notifier(&kvm_timer_cpu_nb);
 	if (err) {
 		kvm_err("Cannot register timer CPU notifier\n");
 		goto out_free;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 506693152e47..8e1dc03342c3 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2444,7 +2444,7 @@ int kvm_vgic_hyp_init(void)
 		return ret;
 	}
 
-	ret = __register_cpu_notifier(&vgic_cpu_nb);
+	ret = register_cpu_notifier(&vgic_cpu_nb);
 	if (ret) {
 		kvm_err("Cannot register vgic CPU notifier\n");
 		goto out_free_irq;

From 7ebda0194ff1872ef36d93cf309cc4043ffba4ee Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 3 Jul 2013 15:02:11 -0700
Subject: [PATCH 281/287] include/linux/mm.h: add PAGE_ALIGNED() helper

To test whether an address is aligned to PAGE_SIZE.

Cc: HATAYAMA Daisuke <d.hatayama@jp.fujitsu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 0fa73b86ef0797ca4fde5334117ca0b330f08030)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/mm.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e0c8528a41a4..f42c5baa47cc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -52,6 +52,9 @@ extern unsigned long sysctl_admin_reserve_kbytes;
 /* to align the pointer to the (next) page boundary */
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
 
+/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
+#define PAGE_ALIGNED(addr)	IS_ALIGNED((unsigned long)addr, PAGE_SIZE)
+
 /*
  * Linux kernel virtual memory manager primitives.
  * The idea being to have a "virtual" mm in the same way

From 8032ebeefc762abdcc8f14f08e9e1d3839996063 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Tue, 25 Feb 2014 08:41:09 +0100
Subject: [PATCH 282/287] ARM: 7990/1: asm: rename logical shift macros push
 pull into lspush lspull

Renames logical shift macros, 'push' and 'pull', defined in
arch/arm/include/asm/assembler.h, into 'lspush' and 'lspull'.
That eliminates name conflict between 'push' logical shift macro
and 'push' instruction mnemonic. That allows assembler.h to be
included in .S files that use 'push' instruction.

Suggested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
(cherry picked from commit d98b90ea22b0a28d9d787769704a9cf1ea5a513a)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/assembler.h      |   8 +-
 arch/arm/lib/copy_template.S          |  36 ++---
 arch/arm/lib/csumpartialcopygeneric.S |  96 ++++++-------
 arch/arm/lib/io-readsl.S              |  12 +-
 arch/arm/lib/io-writesl.S             |  12 +-
 arch/arm/lib/memmove.S                |  36 ++---
 arch/arm/lib/uaccess.S                | 192 +++++++++++++-------------
 7 files changed, 196 insertions(+), 196 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 05ee9eebad6b..9271457adac8 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -30,8 +30,8 @@
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull            lsr
-#define push            lsl
+#define lspull          lsr
+#define lspush          lsl
 #define get_byte_0      lsl #0
 #define get_byte_1	lsr #8
 #define get_byte_2	lsr #16
@@ -41,8 +41,8 @@
 #define put_byte_2	lsl #16
 #define put_byte_3	lsl #24
 #else
-#define pull            lsl
-#define push            lsr
+#define lspull          lsl
+#define lspush          lsr
 #define get_byte_0	lsr #24
 #define get_byte_1	lsr #16
 #define get_byte_2	lsr #8
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..3bc8eb811a73 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -197,24 +197,24 @@
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, lspull #\pull
 		subs	r2, r2, #32
 		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, lspush #\push
+		mov	r4, r4, lspull #\pull
+		orr	r4, r4, r5, lspush #\push
+		mov	r5, r5, lspull #\pull
+		orr	r5, r5, r6, lspush #\push
+		mov	r6, r6, lspull #\pull
+		orr	r6, r6, r7, lspush #\push
+		mov	r7, r7, lspull #\pull
+		orr	r7, r7, r8, lspush #\push
+		mov	r8, r8, lspull #\pull
+		orr	r8, r8, r9, lspush #\push
+		mov	r9, r9, lspull #\pull
+		orr	r9, r9, ip, lspush #\push
+		mov	ip, ip, lspull #\pull
+		orr	ip, ip, lr, lspush #\push
 		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -225,10 +225,10 @@
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, lspull #\pull
 		ldr1w	r1, lr, abort=21f
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, lspush #\push
 		str1w	r0, r3, abort=21f
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a09..d6e742d24007 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -141,7 +141,7 @@ FN_ENTRY
 		tst	len, #2
 		mov	r5, r4, get_byte_0
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
@@ -171,23 +171,23 @@ FN_ENTRY
 		cmp	ip, #2
 		beq	.Lsrc2_aligned
 		bhi	.Lsrc3_aligned
-		mov	r4, r5, pull #8		@ C = 0
+		mov	r4, r5, lspull #8		@ C = 0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
-		mov	r7, r7, pull #8
-		orr	r7, r7, r8, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
+		mov	r7, r7, lspull #8
+		orr	r7, r7, r8, lspush #24
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #8
+		mov	r4, r8, lspull #8
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -196,50 +196,50 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #8
+		mov	r4, r6, lspull #8
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #24
+		orr	r4, r4, r5, lspush #24
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #8
+		mov	r4, r5, lspull #8
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
 		tst	len, #2
 		beq	.Lexit
-		adcs	sum, sum, r4, push #16
+		adcs	sum, sum, r4, lspush #16
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_1
 		strb	r5, [dst], #1
 		mov	r5, r4, get_byte_2
 		b	.Lexit
 
-.Lsrc2_aligned:	mov	r4, r5, pull #16
+.Lsrc2_aligned:	mov	r4, r5, lspull #16
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
-		mov	r7, r7, pull #16
-		orr	r7, r7, r8, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
+		mov	r7, r7, lspull #16
+		orr	r7, r7, r8, lspush #16
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #16
+		mov	r4, r8, lspull #16
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -248,20 +248,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #16
+		mov	r4, r6, lspull #16
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #16
+		orr	r4, r4, r5, lspush #16
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #16
+		mov	r4, r5, lspull #16
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -276,24 +276,24 @@ FN_ENTRY
 		load1b	r5
 		b	.Lexit
 
-.Lsrc3_aligned:	mov	r4, r5, pull #24
+.Lsrc3_aligned:	mov	r4, r5, lspull #24
 		adds	sum, sum, #0
 		bics	ip, len, #15
 		beq	2f
 1:		load4l	r5, r6, r7, r8
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
-		mov	r7, r7, pull #24
-		orr	r7, r7, r8, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
+		mov	r7, r7, lspull #24
+		orr	r7, r7, r8, lspush #8
 		stmia	dst!, {r4, r5, r6, r7}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
 		adcs	sum, sum, r6
 		adcs	sum, sum, r7
-		mov	r4, r8, pull #24
+		mov	r4, r8, lspull #24
 		sub	ip, ip, #16
 		teq	ip, #0
 		bne	1b
@@ -302,20 +302,20 @@ FN_ENTRY
 		tst	ip, #8
 		beq	3f
 		load2l	r5, r6
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
 		stmia	dst!, {r4, r5}
 		adcs	sum, sum, r4
 		adcs	sum, sum, r5
-		mov	r4, r6, pull #24
+		mov	r4, r6, lspull #24
 		tst	ip, #4
 		beq	4f
 3:		load1l	r5
-		orr	r4, r4, r5, push #8
+		orr	r4, r4, r5, lspush #8
 		str	r4, [dst], #4
 		adcs	sum, sum, r4
-		mov	r4, r5, pull #24
+		mov	r4, r5, lspull #24
 4:		ands	len, len, #3
 		beq	.Ldone
 		mov	r5, r4, get_byte_0
@@ -326,7 +326,7 @@ FN_ENTRY
 		load1l	r4
 		mov	r5, r4, get_byte_0
 		strb	r5, [dst], #1
-		adcs	sum, sum, r4, push #24
+		adcs	sum, sum, r4, lspush #24
 		mov	r5, r4, get_byte_1
 		b	.Lexit
 FN_EXIT
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4b..7a7430950c79 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@ ENTRY(__raw_readsl)
 		strb	ip, [r1], #1
 
 4:		subs	r2, r2, #1
-		mov	ip, r3, pull #24
+		mov	ip, r3, lspull #24
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #8
+		orrne	ip, ip, r3, lspush #8
 		strne	ip, [r1], #4
 		bne	4b
 		b	8f
 
 5:		subs	r2, r2, #1
-		mov	ip, r3, pull #16
+		mov	ip, r3, lspull #16
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #16
+		orrne	ip, ip, r3, lspush #16
 		strne	ip, [r1], #4
 		bne	5b
 		b	7f
 
 6:		subs	r2, r2, #1
-		mov	ip, r3, pull #8
+		mov	ip, r3, lspull #8
 		ldrne	r3, [r0]
-		orrne	ip, ip, r3, push #24
+		orrne	ip, ip, r3, lspush #24
 		strne	ip, [r1], #4
 		bne	6b
 
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725c..d0d104a0dd11 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,26 @@ ENTRY(__raw_writesl)
 		blt	5f
 		bgt	6f
 
-4:		mov	ip, r3, pull #16
+4:		mov	ip, r3, lspull #16
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #16
+		orr	ip, ip, r3, lspush #16
 		str	ip, [r0]
 		bne	4b
 		mov	pc, lr
 
-5:		mov	ip, r3, pull #8
+5:		mov	ip, r3, lspull #8
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #24
+		orr	ip, ip, r3, lspush #24
 		str	ip, [r0]
 		bne	5b
 		mov	pc, lr
 
-6:		mov	ip, r3, pull #24
+6:		mov	ip, r3, lspull #24
 		ldr	r3, [r1], #4
 		subs	r2, r2, #1
-		orr	ip, ip, r3, push #8
+		orr	ip, ip, r3, lspush #8
 		str	ip, [r0]
 		bne	6b
 		mov	pc, lr
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14f962d..d1fc0c0c342c 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -147,24 +147,24 @@ ENTRY(memmove)
 
 12:	PLD(	pld	[r1, #-128]		)
 13:		ldmdb   r1!, {r7, r8, r9, ip}
-		mov     lr, r3, push #\push
+		mov     lr, r3, lspush #\push
 		subs    r2, r2, #32
 		ldmdb   r1!, {r3, r4, r5, r6}
-		orr     lr, lr, ip, pull #\pull
-		mov     ip, ip, push #\push
-		orr     ip, ip, r9, pull #\pull
-		mov     r9, r9, push #\push
-		orr     r9, r9, r8, pull #\pull
-		mov     r8, r8, push #\push
-		orr     r8, r8, r7, pull #\pull
-		mov     r7, r7, push #\push
-		orr     r7, r7, r6, pull #\pull
-		mov     r6, r6, push #\push
-		orr     r6, r6, r5, pull #\pull
-		mov     r5, r5, push #\push
-		orr     r5, r5, r4, pull #\pull
-		mov     r4, r4, push #\push
-		orr     r4, r4, r3, pull #\pull
+		orr     lr, lr, ip, lspull #\pull
+		mov     ip, ip, lspush #\push
+		orr     ip, ip, r9, lspull #\pull
+		mov     r9, r9, lspush #\push
+		orr     r9, r9, r8, lspull #\pull
+		mov     r8, r8, lspush #\push
+		orr     r8, r8, r7, lspull #\pull
+		mov     r7, r7, lspush #\push
+		orr     r7, r7, r6, lspull #\pull
+		mov     r6, r6, lspush #\push
+		orr     r6, r6, r5, lspull #\pull
+		mov     r5, r5, lspush #\push
+		orr     r5, r5, r4, lspull #\pull
+		mov     r4, r4, lspush #\push
+		orr     r4, r4, r3, lspull #\pull
 		stmdb   r0!, {r4 - r9, ip, lr}
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -175,10 +175,10 @@ ENTRY(memmove)
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov     lr, r3, push #\push
+15:		mov     lr, r3, lspush #\push
 		ldr	r3, [r1, #-4]!
 		subs	ip, ip, #4
-		orr	lr, lr, r3, pull #\pull
+		orr	lr, lr, r3, lspull #\pull
 		str	lr, [r0, #-4]!
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index 5c908b1cb8ed..e50520904b76 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -117,9 +117,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -131,30 +131,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_1rem8lp
 
-.Lc2u_1cpy8lp:	mov	r3, r7, pull #8
+.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_1cpy8lp
 
 .Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_1fupi
@@ -172,9 +172,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -186,30 +186,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_2rem8lp
 
-.Lc2u_2cpy8lp:	mov	r3, r7, pull #16
+.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_2cpy8lp
 
 .Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_2fupi
@@ -227,9 +227,9 @@ USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
 .Lc2u_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lc2u_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 		ldr	r7, [r1], #4
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		mov	ip, r0, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -241,30 +241,30 @@ USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lc2u_3rem8lp
 
-.Lc2u_3cpy8lp:	mov	r3, r7, pull #24
+.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}			@ Shouldnt fault
 		bpl	.Lc2u_3cpy8lp
 
 .Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
 		ands	ip, ip, #3
 		beq	.Lc2u_3fupi
@@ -382,9 +382,9 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 .Lcfu_1fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_1nowords
-		mov	r3, r7, pull #8
+		mov	r3, r7, lspull #8
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #24
+		orr	r3, r3, r7, lspush #24
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -396,30 +396,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_1rem8lp
 
-.Lcfu_1cpy8lp:	mov	r3, r7, pull #8
+.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #24
-		mov	r4, r4, pull #8
-		orr	r4, r4, r5, push #24
-		mov	r5, r5, pull #8
-		orr	r5, r5, r6, push #24
-		mov	r6, r6, pull #8
-		orr	r6, r6, r7, push #24
+		orr	r3, r3, r4, lspush #24
+		mov	r4, r4, lspull #8
+		orr	r4, r4, r5, lspush #24
+		mov	r5, r5, lspull #8
+		orr	r5, r5, r6, lspush #24
+		mov	r6, r6, lspull #8
+		orr	r6, r6, r7, lspush #24
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_1cpy8lp
 
 .Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #24
-		movne	r4, r4, pull #8
-		orrne	r4, r4, r7, push #24
+		orrne	r3, r3, r4, lspush #24
+		movne	r4, r4, lspull #8
+		orrne	r4, r4, r7, lspush #24
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #8
+		movne	r3, r7, lspull #8
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #24
+		orrne	r3, r3, r7, lspush #24
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_1fupi
@@ -437,9 +437,9 @@ USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
 .Lcfu_2fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_2nowords
-		mov	r3, r7, pull #16
+		mov	r3, r7, lspull #16
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #16
+		orr	r3, r3, r7, lspush #16
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -452,30 +452,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		blt	.Lcfu_2rem8lp
 
 
-.Lcfu_2cpy8lp:	mov	r3, r7, pull #16
+.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
 		subs	ip, ip, #16
-		orr	r3, r3, r4, push #16
-		mov	r4, r4, pull #16
-		orr	r4, r4, r5, push #16
-		mov	r5, r5, pull #16
-		orr	r5, r5, r6, push #16
-		mov	r6, r6, pull #16
-		orr	r6, r6, r7, push #16
+		orr	r3, r3, r4, lspush #16
+		mov	r4, r4, lspull #16
+		orr	r4, r4, r5, lspush #16
+		mov	r5, r5, lspull #16
+		orr	r5, r5, r6, lspush #16
+		mov	r6, r6, lspull #16
+		orr	r6, r6, r7, lspush #16
 		stmia	r0!, {r3 - r6}
 		bpl	.Lcfu_2cpy8lp
 
 .Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #16
-		movne	r4, r4, pull #16
-		orrne	r4, r4, r7, push #16
+		orrne	r3, r3, r4, lspush #16
+		movne	r4, r4, lspull #16
+		orrne	r4, r4, r7, lspush #16
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #16
+		movne	r3, r7, lspull #16
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #16
+		orrne	r3, r3, r7, lspush #16
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_2fupi
@@ -493,9 +493,9 @@ USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault
 .Lcfu_3fupi:	subs	r2, r2, #4
 		addmi	ip, r2, #4
 		bmi	.Lcfu_3nowords
-		mov	r3, r7, pull #24
+		mov	r3, r7, lspull #24
 USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, push #8
+		orr	r3, r3, r7, lspush #8
 		str	r3, [r0], #4
 		mov	ip, r1, lsl #32 - PAGE_SHIFT
 		rsb	ip, ip, #0
@@ -507,30 +507,30 @@ USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
 		subs	ip, ip, #16
 		blt	.Lcfu_3rem8lp
 
-.Lcfu_3cpy8lp:	mov	r3, r7, pull #24
+.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24
 		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, push #8
-		mov	r4, r4, pull #24
-		orr	r4, r4, r5, push #8
-		mov	r5, r5, pull #24
-		orr	r5, r5, r6, push #8
-		mov	r6, r6, pull #24
-		orr	r6, r6, r7, push #8
+		orr	r3, r3, r4, lspush #8
+		mov	r4, r4, lspull #24
+		orr	r4, r4, r5, lspush #8
+		mov	r5, r5, lspull #24
+		orr	r5, r5, r6, lspush #8
+		mov	r6, r6, lspull #24
+		orr	r6, r6, r7, lspush #8
 		stmia	r0!, {r3 - r6}
 		subs	ip, ip, #16
 		bpl	.Lcfu_3cpy8lp
 
 .Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, push #8
-		movne	r4, r4, pull #24
-		orrne	r4, r4, r7, push #8
+		orrne	r3, r3, r4, lspush #8
+		movne	r4, r4, lspull #24
+		orrne	r4, r4, r7, lspush #8
 		stmneia	r0!, {r3 - r4}
 		tst	ip, #4
-		movne	r3, r7, pull #24
+		movne	r3, r7, lspull #24
 USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, push #8
+		orrne	r3, r3, r7, lspush #8
 		strne	r3, [r0], #4
 		ands	ip, ip, #3
 		beq	.Lcfu_3fupi

From 0db5306859a69c38c695400c34bde7b7df5f1806 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 10 May 2013 18:07:19 +0100
Subject: [PATCH 283/287] ARM: barrier: allow options to be passed to memory
 barrier instructions

On ARMv7, the memory barrier instructions take an optional `option'
field which can be used to constrain the effects of a memory barrier
based on shareability and access type.

This patch allows the caller to pass these options if required, and
updates the smp_*() barriers to request inner-shareable barriers,
affecting only stores for the _wmb variant. wmb() is also changed to
use the -st version of dsb.

Reported-by: Albin Tonnerre <albin.tonnerre@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 3ea128065ed20d33bd02ff6dab689f88e38000be)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/assembler.h |  4 ++--
 arch/arm/include/asm/barrier.h   | 32 ++++++++++++++++----------------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 9271457adac8..cab788045029 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -212,9 +212,9 @@
 #ifdef CONFIG_SMP
 #if __LINUX_ARM_ARCH__ >= 7
 	.ifeqs "\mode","arm"
-	ALT_SMP(dmb)
+	ALT_SMP(dmb	ish)
 	.else
-	ALT_SMP(W(dmb))
+	ALT_SMP(W(dmb)	ish)
 	.endif
 #elif __LINUX_ARM_ARCH__ == 6
 	ALT_SMP(mcr	p15, 0, r0, c7, c10, 5)	@ dmb
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 8dcd9c702d90..60f15e274e6d 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -14,27 +14,27 @@
 #endif
 
 #if __LINUX_ARM_ARCH__ >= 7
-#define isb() __asm__ __volatile__ ("isb" : : : "memory")
-#define dsb() __asm__ __volatile__ ("dsb" : : : "memory")
-#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
+#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory")
+#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory")
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
 #elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
+#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
 				    : : "r" (0) : "memory")
 #elif defined(CONFIG_CPU_FA526)
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
 				    : : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #else
-#define isb() __asm__ __volatile__ ("" : : : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define isb(x) __asm__ __volatile__ ("" : : : "memory")
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
 				    : : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
 #endif
 
 #ifdef CONFIG_ARCH_HAS_BARRIERS
@@ -42,7 +42,7 @@
 #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
 #define mb()		do { dsb(); outer_sync(); } while (0)
 #define rmb()		dsb()
-#define wmb()		mb()
+#define wmb()		do { dsb(st); outer_sync(); } while (0)
 #else
 #define mb()		barrier()
 #define rmb()		barrier()
@@ -54,9 +54,9 @@
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #else
-#define smp_mb()	dmb()
-#define smp_rmb()	dmb()
-#define smp_wmb()	dmb()
+#define smp_mb()	dmb(ish)
+#define smp_rmb()	smp_mb()
+#define smp_wmb()	dmb(ishst)
 #endif
 
 #define read_barrier_depends()		do { } while(0)

From 63d832d826b75fec2ed062e5a5dc107c5a5db158 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Wed, 31 Jul 2013 12:44:42 -0400
Subject: [PATCH 284/287] ARM: mm: Introduce virt_to_idmap() with an arch hook

On some PAE systems (e.g. TI Keystone), memory is above the
32-bit addressable limit, and the interconnect provides an
aliased view of parts of physical memory in the 32-bit addressable
space.  This alias is strictly for boot time usage, and is not
otherwise usable because of coherency limitations. On such systems,
the idmap mechanism needs to take this aliased mapping into account.

This patch introduces virt_to_idmap() and a arch function pointer which
can be populated by platform which needs it. Also populate necessary
idmap spots with now available virt_to_idmap(). Avoided #ifdef approach
to be compatible with multi-platform builds.

Most architecture won't touch it and in that case virt_to_idmap()
fall-back to existing virt_to_phys() macro.

Cc: Russell King <linux@arm.linux.org.uk>

Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
(cherry picked from commit 4dc9a81715973cb137a14399420bb35b0ed7d6ef)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/memory.h | 16 ++++++++++++++++
 arch/arm/kernel/smp.c         |  4 ++--
 arch/arm/mm/idmap.c           |  5 +++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 57870ab313c5..21b458e6b0b8 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -157,6 +157,7 @@
  */
 #define __PV_BITS_31_24	0x81000000
 
+extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
 extern unsigned long __pv_phys_offset;
 #define PHYS_OFFSET __pv_phys_offset
 
@@ -232,6 +233,21 @@ static inline void *phys_to_virt(phys_addr_t x)
 #define __va(x)			((void *)__phys_to_virt((unsigned long)(x)))
 #define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
 
+/*
+ * These are for systems that have a hardware interconnect supported alias of
+ * physical memory for idmap purposes.  Most cases should leave these
+ * untouched.
+ */
+static inline phys_addr_t __virt_to_idmap(unsigned long x)
+{
+	if (arch_virt_to_idmap)
+		return arch_virt_to_idmap(x);
+	else
+		return __virt_to_phys(x);
+}
+
+#define virt_to_idmap(x)	__virt_to_idmap((unsigned long)(x))
+
 /*
  * Virtual <-> DMA view memory address translations
  * Again, these are *only* valid on the kernel direct mapped RAM
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb451bb9..5a8ad2c8eda0 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -87,8 +87,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * its stack and the page tables.
 	 */
 	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
-	secondary_data.pgdir = virt_to_phys(idmap_pgd);
-	secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+	secondary_data.pgdir = virt_to_idmap(idmap_pgd);
+	secondary_data.swapper_pg_dir = virt_to_idmap(swapper_pg_dir);
 	__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
 
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 83cb3ac27095..c0a1e48f6733 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -10,6 +10,7 @@
 #include <asm/system_info.h>
 
 pgd_t *idmap_pgd;
+phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
 
 #ifdef CONFIG_ARM_LPAE
 static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
@@ -67,8 +68,8 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,
 	unsigned long addr, end;
 	unsigned long next;
 
-	addr = virt_to_phys(text_start);
-	end = virt_to_phys(text_end);
+	addr = virt_to_idmap(text_start);
+	end = virt_to_idmap(text_end);
 
 	prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
 

From c4673ca7850dae7146ecc35c69e8f17f2337c7d2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 12 Sep 2014 22:17:23 +0200
Subject: [PATCH 285/287] mm: export symbol dependencies of is_zero_pfn()

In order to make the static inline function is_zero_pfn() callable by
modules, export its symbol dependencies 'zero_pfn' and (for s390 and
mips) 'zero_page_mask'.

We need this for KVM, as CONFIG_KVM is a tristate for all supported
architectures except ARM and arm64, and testing a pfn whether it refers
to the zero page is required to correctly distinguish the zero page
from other special RAM ranges that may also have the PG_reserved bit
set, but need to be treated as MMIO memory.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 0b70068e47e8f0c813a902dc3d6def601fd15acb)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/mips/mm/init.c | 1 +
 arch/s390/mm/init.c | 1 +
 mm/memory.c         | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9b973e0af9cb..d340d53c345b 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -74,6 +74,7 @@
  */
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL_GPL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
 
 /*
  * Not static inline because used by IP27 special magic initialization code
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index eba15f18fd38..a4dfc0bd05db 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
 
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
 
 static void __init setup_zero_pages(void)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 5a35443c01ad..20bb9e901781 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps);
 unsigned long zero_pfn __read_mostly;
 unsigned long highest_memmap_pfn __read_mostly;
 
+EXPORT_SYMBOL(zero_pfn);
+
 /*
  * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
  */

From 331a9a729183832840d656036c5bc2b81942c294 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 12 Sep 2014 15:16:00 +0200
Subject: [PATCH 286/287] KVM: check for !is_zero_pfn() in kvm_is_mmio_pfn()

Read-only memory ranges may be backed by the zero page, so avoid
misidentifying it a a MMIO pfn.

This fixes another issue I identified when testing QEMU+KVM_UEFI, where
a read to an uninitialized emulated NOR flash brought in the zero page,
but mapped as a read-write device region, because kvm_is_mmio_pfn()
misidentifies it as a MMIO pfn due to its PG_reserved bit being set.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: b88657674d39 ("ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 85c8555ff07ef09261bd50d603cd4290cff5a8cc)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f019669674a5..b64d44219f27 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -110,7 +110,7 @@ static bool largepages_enabled = true;
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
-		return PageReserved(pfn_to_page(pfn));
+		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
 
 	return true;
 }

From b8a669d29702a8fb529f4fae450a86b8676b0e42 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sam.bobroff@au1.ibm.com>
Date: Fri, 19 Sep 2014 09:40:41 +1000
Subject: [PATCH 287/287] KVM: correct null pid check in kvm_vcpu_yield_to()

Correct a simple mistake of checking the wrong variable
before a dereference, resulting in the dereference not being
properly protected by rcu_dereference().

Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
(cherry picked from commit 27fbe64bfa63cfb9da025975b59d96568caa2d53)
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b64d44219f27..9cae94206f41 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1728,7 +1728,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
 	rcu_read_lock();
 	pid = rcu_dereference(target->pid);
 	if (pid)
-		task = get_pid_task(target->pid, PIDTYPE_PID);
+		task = get_pid_task(pid, PIDTYPE_PID);
 	rcu_read_unlock();
 	if (!task)
 		return ret;