From 29caeda359da15d16963096043cda39530f81cc4 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Mon, 9 Sep 2024 12:47:17 +0000 Subject: [PATCH 1/5] KVM: arm64: Move pagetable definitions to common header In preparation for using the stage-2 definitions in ptdump, move some of these macros in the common header. Signed-off-by: Sebastian Ene Link: https://lore.kernel.org/r/20240909124721.1672199-2-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_pgtable.h | 42 ++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/pgtable.c | 42 ---------------------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 19278dfe7978..03f4c3d7839c 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -59,6 +59,48 @@ typedef u64 kvm_pte_t; #define KVM_PHYS_INVALID (-1ULL) +#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) + +#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \ + ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; }) +#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \ + ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; }) +#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) + +#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) +#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) +#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) +#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 +#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) + +#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) + +#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) + +#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) + +#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) + +#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) + +#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ + KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ + KVM_PTE_LEAF_ATTR_HI_S2_XN) + +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) +#define KVM_MAX_OWNER_ID 1 + +/* + * Used to indicate a pte for which a 'break-before-make' sequence is in + * progress. + */ +#define KVM_INVALID_PTE_LOCKED BIT(10) + static inline bool kvm_pte_valid(kvm_pte_t pte) { return pte & KVM_PTE_VALID; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 9e2bbee77491..c3e9d77bba23 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -17,48 +17,6 @@ #define KVM_PTE_TYPE_PAGE 1 #define KVM_PTE_TYPE_TABLE 1 -#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) - -#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) -#define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) -#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \ - ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; }) -#define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \ - ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; }) -#define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) -#define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 -#define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) - -#define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) -#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) -#define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) -#define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) -#define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 -#define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) - -#define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) - -#define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) - -#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) - -#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) - -#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) - -#define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ - KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ - KVM_PTE_LEAF_ATTR_HI_S2_XN) - -#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) -#define KVM_MAX_OWNER_ID 1 - -/* - * Used to indicate a pte for which a 'break-before-make' sequence is in - * progress. - */ -#define KVM_INVALID_PTE_LOCKED BIT(10) - struct kvm_pgtable_walk_data { struct kvm_pgtable_walker *walker; From acc3d3a8176651a839056c7da4b925ea0bcc38c2 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Mon, 9 Sep 2024 12:47:18 +0000 Subject: [PATCH 2/5] arm64: ptdump: Expose the attribute parsing functionality Reuse the descriptor parsing functionality to keep the same output format as the original ptdump code. In order for this to happen, move the state tracking objects into a common header. [maz: Fixed note_page() stub as suggested by Will] Signed-off-by: Sebastian Ene Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240909124721.1672199-3-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/ptdump.h | 42 ++++++++++++++++++++++++- arch/arm64/mm/ptdump.c | 55 +++++++-------------------------- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 5b1701c76d1c..aa46b5d2cf85 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -5,6 +5,8 @@ #ifndef __ASM_PTDUMP_H #define __ASM_PTDUMP_H +#include + #ifdef CONFIG_PTDUMP_CORE #include @@ -21,14 +23,52 @@ struct ptdump_info { unsigned long base_addr; }; +struct ptdump_prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +struct ptdump_pg_level { + const struct ptdump_prot_bits *bits; + char name[4]; + int num; + u64 mask; +}; + +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ +struct ptdump_pg_state { + struct ptdump_state ptdump; + struct seq_file *seq; + const struct addr_marker *marker; + const struct mm_struct *mm; + unsigned long start_address; + int level; + u64 current_prot; + bool check_wx; + unsigned long wx_pages; + unsigned long uxn_pages; +}; + void ptdump_walk(struct seq_file *s, struct ptdump_info *info); +void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + u64 val); #ifdef CONFIG_PTDUMP_DEBUGFS #define EFI_RUNTIME_MAP_END DEFAULT_MAP_WINDOW_64 void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } -#endif +#endif /* CONFIG_PTDUMP_DEBUGFS */ +#else +static inline void note_page(struct ptdump_state *pt_st, unsigned long addr, + int level, u64 val) { } #endif /* CONFIG_PTDUMP_CORE */ #endif /* __ASM_PTDUMP_H */ diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 6986827e0d64..404751fd30fe 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -38,33 +38,7 @@ seq_printf(m, fmt); \ }) -/* - * The page dumper groups page table entries of the same type into a single - * description. It uses pg_state to track the range information while - * iterating over the pte entries. When the continuity is broken it then - * dumps out a description of the range. - */ -struct pg_state { - struct ptdump_state ptdump; - struct seq_file *seq; - const struct addr_marker *marker; - const struct mm_struct *mm; - unsigned long start_address; - int level; - u64 current_prot; - bool check_wx; - unsigned long wx_pages; - unsigned long uxn_pages; -}; - -struct prot_bits { - u64 mask; - u64 val; - const char *set; - const char *clear; -}; - -static const struct prot_bits pte_bits[] = { +static const struct ptdump_prot_bits pte_bits[] = { { .mask = PTE_VALID, .val = PTE_VALID, @@ -143,14 +117,7 @@ static const struct prot_bits pte_bits[] = { } }; -struct pg_level { - const struct prot_bits *bits; - char name[4]; - int num; - u64 mask; -}; - -static struct pg_level pg_level[] __ro_after_init = { +static struct ptdump_pg_level pg_level[] __ro_after_init = { { /* pgd */ .name = "PGD", .bits = pte_bits, @@ -174,7 +141,7 @@ static struct pg_level pg_level[] __ro_after_init = { }, }; -static void dump_prot(struct pg_state *st, const struct prot_bits *bits, +static void dump_prot(struct ptdump_pg_state *st, const struct ptdump_prot_bits *bits, size_t num) { unsigned i; @@ -192,7 +159,7 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits, } } -static void note_prot_uxn(struct pg_state *st, unsigned long addr) +static void note_prot_uxn(struct ptdump_pg_state *st, unsigned long addr) { if (!st->check_wx) return; @@ -206,7 +173,7 @@ static void note_prot_uxn(struct pg_state *st, unsigned long addr) st->uxn_pages += (addr - st->start_address) / PAGE_SIZE; } -static void note_prot_wx(struct pg_state *st, unsigned long addr) +static void note_prot_wx(struct ptdump_pg_state *st, unsigned long addr) { if (!st->check_wx) return; @@ -221,10 +188,10 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } -static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, - u64 val) +void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + u64 val) { - struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump); static const char units[] = "KMGTPE"; u64 prot = 0; @@ -286,12 +253,12 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, void ptdump_walk(struct seq_file *s, struct ptdump_info *info) { unsigned long end = ~0UL; - struct pg_state st; + struct ptdump_pg_state st; if (info->base_addr < TASK_SIZE_64) end = TASK_SIZE_64; - st = (struct pg_state){ + st = (struct ptdump_pg_state){ .seq = s, .marker = info->markers, .mm = info->mm, @@ -324,7 +291,7 @@ static struct ptdump_info kernel_ptdump_info __ro_after_init = { bool ptdump_check_wx(void) { - struct pg_state st = { + struct ptdump_pg_state st = { .seq = NULL, .marker = (struct addr_marker[]) { { 0, NULL}, From 9182301a7bd2564fb050ade9820333c8b1adfcc2 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Mon, 9 Sep 2024 12:47:19 +0000 Subject: [PATCH 3/5] arm64: ptdump: Use the ptdump description from a local context Rename the attributes description array to allow the parsing method to use the description from a local context. To be able to do this, store a pointer to the description array in the state structure. This will allow for the later introduced callers (stage_2 ptdump) to specify their own page table description format to the ptdump parser. Signed-off-by: Sebastian Ene Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240909124721.1672199-4-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/ptdump.h | 1 + arch/arm64/mm/ptdump.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index aa46b5d2cf85..6cf4aae05219 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -45,6 +45,7 @@ struct ptdump_pg_level { */ struct ptdump_pg_state { struct ptdump_state ptdump; + struct ptdump_pg_level *pg_level; struct seq_file *seq; const struct addr_marker *marker; const struct mm_struct *mm; diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 404751fd30fe..ca53ef274a8b 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -117,7 +117,7 @@ static const struct ptdump_prot_bits pte_bits[] = { } }; -static struct ptdump_pg_level pg_level[] __ro_after_init = { +static struct ptdump_pg_level kernel_pg_levels[] __ro_after_init = { { /* pgd */ .name = "PGD", .bits = pte_bits, @@ -192,6 +192,7 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { struct ptdump_pg_state *st = container_of(pt_st, struct ptdump_pg_state, ptdump); + struct ptdump_pg_level *pg_level = st->pg_level; static const char units[] = "KMGTPE"; u64 prot = 0; @@ -262,6 +263,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info) .seq = s, .marker = info->markers, .mm = info->mm, + .pg_level = &kernel_pg_levels[0], .level = -1, .ptdump = { .note_page = note_page, @@ -279,10 +281,10 @@ static void __init ptdump_initialize(void) { unsigned i, j; - for (i = 0; i < ARRAY_SIZE(pg_level); i++) - if (pg_level[i].bits) - for (j = 0; j < pg_level[i].num; j++) - pg_level[i].mask |= pg_level[i].bits[j].mask; + for (i = 0; i < ARRAY_SIZE(kernel_pg_levels); i++) + if (kernel_pg_levels[i].bits) + for (j = 0; j < kernel_pg_levels[i].num; j++) + kernel_pg_levels[i].mask |= kernel_pg_levels[i].bits[j].mask; } static struct ptdump_info kernel_ptdump_info __ro_after_init = { @@ -297,6 +299,7 @@ bool ptdump_check_wx(void) { 0, NULL}, { -1, NULL}, }, + .pg_level = &kernel_pg_levels[0], .level = -1, .check_wx = true, .ptdump = { From 79c4c7284f92d5e780c8532c343ca2cacfaf5125 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Mon, 9 Sep 2024 12:47:20 +0000 Subject: [PATCH 4/5] arm64: ptdump: Don't override the level when operating on the stage-2 tables Ptdump uses the init_mm structure directly to dump the kernel pagetables. When ptdump is called on the stage-2 pagetables, this mm argument is not used. Prevent the level from being overwritten by checking the argument against NULL. Signed-off-by: Sebastian Ene Acked-by: Will Deacon Link: https://lore.kernel.org/r/20240909124721.1672199-5-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/mm/ptdump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index ca53ef274a8b..264c5f9b97d8 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -197,8 +197,8 @@ void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 prot = 0; /* check if the current level has been folded dynamically */ - if ((level == 1 && mm_p4d_folded(st->mm)) || - (level == 2 && mm_pud_folded(st->mm))) + if (st->mm && ((level == 1 && mm_p4d_folded(st->mm)) || + (level == 2 && mm_pud_folded(st->mm)))) level = 0; if (level >= 0) From 7c4f73548ed15476daf1101f66648085eda65067 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Mon, 9 Sep 2024 12:47:21 +0000 Subject: [PATCH 5/5] KVM: arm64: Register ptdump with debugfs on guest creation While arch/*/mem/ptdump handles the kernel pagetable dumping code, introduce KVM/ptdump to show the guest stage-2 pagetables. The separation is necessary because most of the definitions from the stage-2 pagetable reside in the KVM path and we will be invoking functionality specific to KVM. Introduce the PTDUMP_STAGE2_DEBUGFS config. When a guest is created, register a new file entry under the guest debugfs dir which allows userspace to show the contents of the guest stage-2 pagetables when accessed. [maz: moved function prototypes from kvm_host.h to kvm_mmu.h] Signed-off-by: Sebastian Ene Reviewed-by: Vincent Donnefort Link: https://lore.kernel.org/r/20240909124721.1672199-6-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_mmu.h | 6 + arch/arm64/kvm/Kconfig | 17 ++ arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/ptdump.c | 268 +++++++++++++++++++++++++++++++ 5 files changed, 293 insertions(+) create mode 100644 arch/arm64/kvm/ptdump.c diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 216ca424bb16..cd4087fbda9a 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -352,5 +352,11 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) return &kvm->arch.mmu != mmu; } +#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm); +#else +static inline void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) {} +#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */ + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 58f09370d17e..60a0b72df7b9 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -65,4 +65,21 @@ config PROTECTED_NVHE_STACKTRACE If unsure, or not using protected nVHE (pKVM), say N. +config PTDUMP_STAGE2_DEBUGFS + bool "Present the stage-2 pagetables to debugfs" + depends on KVM + depends on DEBUG_KERNEL + depends on DEBUG_FS + depends on GENERIC_PTDUMP + select PTDUMP_CORE + default n + help + Say Y here if you want to show the stage-2 kernel pagetables + layout in a debugfs file. This information is only useful for kernel developers + who are working in architecture specific areas of the kernel. + It is probably not a good idea to enable this feature in a production + kernel. + + If in doubt, say N. + endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a6497228c5a8..f6ef4140b20a 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -24,6 +24,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o +kvm-$(CONFIG_PTDUMP_STAGE2_DEBUGFS) += ptdump.o always-y := hyp_constants.h hyp-constants.s diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a7ca776b51ec..32bdb116d5be 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -227,6 +227,7 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) void kvm_arch_create_vm_debugfs(struct kvm *kvm) { kvm_sys_regs_create_debugfs(kvm); + kvm_s2_ptdump_create_debugfs(kvm); } static void kvm_destroy_mpidr_data(struct kvm *kvm) diff --git a/arch/arm64/kvm/ptdump.c b/arch/arm64/kvm/ptdump.c new file mode 100644 index 000000000000..e4a342e903e2 --- /dev/null +++ b/arch/arm64/kvm/ptdump.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Debug helper used to dump the stage-2 pagetables of the system and their + * associated permissions. + * + * Copyright (C) Google, 2024 + * Author: Sebastian Ene + */ +#include +#include +#include + +#include +#include +#include + +#define MARKERS_LEN 2 +#define KVM_PGTABLE_MAX_LEVELS (KVM_PGTABLE_LAST_LEVEL + 1) + +struct kvm_ptdump_guest_state { + struct kvm *kvm; + struct ptdump_pg_state parser_state; + struct addr_marker ipa_marker[MARKERS_LEN]; + struct ptdump_pg_level level[KVM_PGTABLE_MAX_LEVELS]; + struct ptdump_range range[MARKERS_LEN]; +}; + +static const struct ptdump_prot_bits stage2_pte_bits[] = { + { + .mask = PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "F", + }, { + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID, + .set = "R", + .clear = " ", + }, { + .mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, + .val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID, + .set = "W", + .clear = " ", + }, { + .mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID, + .val = PTE_VALID, + .set = " ", + .clear = "X", + }, { + .mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, + .val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID, + .set = "AF", + .clear = " ", + }, { + .mask = PTE_TABLE_BIT | PTE_VALID, + .val = PTE_VALID, + .set = "BLK", + .clear = " ", + }, +}; + +static int kvm_ptdump_visitor(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) +{ + struct ptdump_pg_state *st = ctx->arg; + struct ptdump_state *pt_st = &st->ptdump; + + note_page(pt_st, ctx->addr, ctx->level, ctx->old); + + return 0; +} + +static int kvm_ptdump_build_levels(struct ptdump_pg_level *level, u32 start_lvl) +{ + u32 i; + u64 mask; + + if (WARN_ON_ONCE(start_lvl >= KVM_PGTABLE_LAST_LEVEL)) + return -EINVAL; + + mask = 0; + for (i = 0; i < ARRAY_SIZE(stage2_pte_bits); i++) + mask |= stage2_pte_bits[i].mask; + + for (i = start_lvl; i < KVM_PGTABLE_MAX_LEVELS; i++) { + snprintf(level[i].name, sizeof(level[i].name), "%u", i); + + level[i].num = ARRAY_SIZE(stage2_pte_bits); + level[i].bits = stage2_pte_bits; + level[i].mask = mask; + } + + return 0; +} + +static struct kvm_ptdump_guest_state *kvm_ptdump_parser_create(struct kvm *kvm) +{ + struct kvm_ptdump_guest_state *st; + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; + struct kvm_pgtable *pgtable = mmu->pgt; + int ret; + + st = kzalloc(sizeof(struct kvm_ptdump_guest_state), GFP_KERNEL_ACCOUNT); + if (!st) + return ERR_PTR(-ENOMEM); + + ret = kvm_ptdump_build_levels(&st->level[0], pgtable->start_level); + if (ret) { + kfree(st); + return ERR_PTR(ret); + } + + st->ipa_marker[0].name = "Guest IPA"; + st->ipa_marker[1].start_address = BIT(pgtable->ia_bits); + st->range[0].end = BIT(pgtable->ia_bits); + + st->kvm = kvm; + st->parser_state = (struct ptdump_pg_state) { + .marker = &st->ipa_marker[0], + .level = -1, + .pg_level = &st->level[0], + .ptdump.range = &st->range[0], + .start_address = 0, + }; + + return st; +} + +static int kvm_ptdump_guest_show(struct seq_file *m, void *unused) +{ + int ret; + struct kvm_ptdump_guest_state *st = m->private; + struct kvm *kvm = st->kvm; + struct kvm_s2_mmu *mmu = &kvm->arch.mmu; + struct ptdump_pg_state *parser_state = &st->parser_state; + struct kvm_pgtable_walker walker = (struct kvm_pgtable_walker) { + .cb = kvm_ptdump_visitor, + .arg = parser_state, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + parser_state->seq = m; + + write_lock(&kvm->mmu_lock); + ret = kvm_pgtable_walk(mmu->pgt, 0, BIT(mmu->pgt->ia_bits), &walker); + write_unlock(&kvm->mmu_lock); + + return ret; +} + +static int kvm_ptdump_guest_open(struct inode *m, struct file *file) +{ + struct kvm *kvm = m->i_private; + struct kvm_ptdump_guest_state *st; + int ret; + + if (!kvm_get_kvm_safe(kvm)) + return -ENOENT; + + st = kvm_ptdump_parser_create(kvm); + if (IS_ERR(st)) { + ret = PTR_ERR(st); + goto err_with_kvm_ref; + } + + ret = single_open(file, kvm_ptdump_guest_show, st); + if (!ret) + return 0; + + kfree(st); +err_with_kvm_ref: + kvm_put_kvm(kvm); + return ret; +} + +static int kvm_ptdump_guest_close(struct inode *m, struct file *file) +{ + struct kvm *kvm = m->i_private; + void *st = ((struct seq_file *)file->private_data)->private; + + kfree(st); + kvm_put_kvm(kvm); + + return single_release(m, file); +} + +static const struct file_operations kvm_ptdump_guest_fops = { + .open = kvm_ptdump_guest_open, + .read = seq_read, + .llseek = seq_lseek, + .release = kvm_ptdump_guest_close, +}; + +static int kvm_pgtable_range_show(struct seq_file *m, void *unused) +{ + struct kvm_pgtable *pgtable = m->private; + + seq_printf(m, "%2u\n", pgtable->ia_bits); + return 0; +} + +static int kvm_pgtable_levels_show(struct seq_file *m, void *unused) +{ + struct kvm_pgtable *pgtable = m->private; + + seq_printf(m, "%1d\n", KVM_PGTABLE_MAX_LEVELS - pgtable->start_level); + return 0; +} + +static int kvm_pgtable_debugfs_open(struct inode *m, struct file *file, + int (*show)(struct seq_file *, void *)) +{ + struct kvm *kvm = m->i_private; + struct kvm_pgtable *pgtable; + int ret; + + if (!kvm_get_kvm_safe(kvm)) + return -ENOENT; + + pgtable = kvm->arch.mmu.pgt; + + ret = single_open(file, show, pgtable); + if (ret < 0) + kvm_put_kvm(kvm); + return ret; +} + +static int kvm_pgtable_range_open(struct inode *m, struct file *file) +{ + return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_range_show); +} + +static int kvm_pgtable_levels_open(struct inode *m, struct file *file) +{ + return kvm_pgtable_debugfs_open(m, file, kvm_pgtable_levels_show); +} + +static int kvm_pgtable_debugfs_close(struct inode *m, struct file *file) +{ + struct kvm *kvm = m->i_private; + + kvm_put_kvm(kvm); + return single_release(m, file); +} + +static const struct file_operations kvm_pgtable_range_fops = { + .open = kvm_pgtable_range_open, + .read = seq_read, + .llseek = seq_lseek, + .release = kvm_pgtable_debugfs_close, +}; + +static const struct file_operations kvm_pgtable_levels_fops = { + .open = kvm_pgtable_levels_open, + .read = seq_read, + .llseek = seq_lseek, + .release = kvm_pgtable_debugfs_close, +}; + +void kvm_s2_ptdump_create_debugfs(struct kvm *kvm) +{ + debugfs_create_file("stage2_page_tables", 0400, kvm->debugfs_dentry, + kvm, &kvm_ptdump_guest_fops); + debugfs_create_file("ipa_range", 0400, kvm->debugfs_dentry, kvm, + &kvm_pgtable_range_fops); + debugfs_create_file("stage2_levels", 0400, kvm->debugfs_dentry, + kvm, &kvm_pgtable_levels_fops); +}