mirror of
https://github.com/torvalds/linux.git
synced 2026-05-12 16:18:45 +02:00
mm/memory: convert print_bad_pte() to print_bad_page_map()
print_bad_pte() looks like something that should actually be a WARN or similar, but historically it apparently has proven to be useful to detect corruption of page tables even on production systems -- report the issue and keep the system running to make it easier to actually detect what is going wrong (e.g., multiple such messages might shed a light). As we want to unify vm_normal_page_*() handling for PTE/PMD/PUD, we'll have to take care of print_bad_pte() as well. Let's prepare for using print_bad_pte() also for non-PTEs by adjusting the implementation and renaming the function to print_bad_page_map(). Provide print_bad_pte() as a simple wrapper. Document the implicit locking requirements for the page table re-walk. To make the function a bit more readable, factor out the ratelimit check into is_bad_page_map_ratelimited() and place the printing of page table content into __print_bad_page_map_pgtable(). We'll now dump information from each level in a single line, and just stop the table walk once we hit something that is not a present page table. The report will now look something like (dumping pgd to pmd values): [ 77.943408] BUG: Bad page map in process XXX pte:80000001233f5867 [ 77.944077] addr:00007fd84bb1c000 vm_flags:08100071 anon_vma: ... [ 77.945186] pgd:10a89f067 p4d:10a89f067 pud:10e5a2067 pmd:105327067 Not using pgdp_get(), because that does not work properly on some arm configs where pgd_t is an array. Note that we are dumping all levels even when levels are folded for simplicity. [david@redhat.com: drop warning] Link: https://lkml.kernel.org/r/923b279c-de33-44dd-a923-2959afad8626@redhat.com Link: https://lkml.kernel.org/r/20250811112631.759341-9-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Barry Song <baohua@kernel.org> Cc: Christian Brauner <brauner@kernel.org> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Dan Williams <dan.j.williams@intel.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Dev Jain <dev.jain@arm.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: Jann Horn <jannh@google.com> Cc: Juegren Gross <jgross@suse.com> Cc: Lance Yang <lance.yang@linux.dev> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Mariano Pache <npache@redhat.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
b22cc9a9c7
commit
ec63a44011
|
|
@ -1983,6 +1983,24 @@ enum pgtable_level {
|
|||
PGTABLE_LEVEL_PGD,
|
||||
};
|
||||
|
||||
static inline const char *pgtable_level_to_str(enum pgtable_level level)
|
||||
{
|
||||
switch (level) {
|
||||
case PGTABLE_LEVEL_PTE:
|
||||
return "pte";
|
||||
case PGTABLE_LEVEL_PMD:
|
||||
return "pmd";
|
||||
case PGTABLE_LEVEL_PUD:
|
||||
return "pud";
|
||||
case PGTABLE_LEVEL_P4D:
|
||||
return "p4d";
|
||||
case PGTABLE_LEVEL_PGD:
|
||||
return "pgd";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#if !defined(MAX_POSSIBLE_PHYSMEM_BITS) && !defined(CONFIG_64BIT)
|
||||
|
|
|
|||
104
mm/memory.c
104
mm/memory.c
|
|
@ -491,22 +491,8 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
|
|||
add_mm_counter(mm, i, rss[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called to print an error when a bad pte
|
||||
* is found. For example, we might have a PFN-mapped pte in
|
||||
* a region that doesn't allow it.
|
||||
*
|
||||
* The calling function must still handle the error.
|
||||
*/
|
||||
static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
|
||||
pte_t pte, struct page *page)
|
||||
static bool is_bad_page_map_ratelimited(void)
|
||||
{
|
||||
pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
|
||||
p4d_t *p4d = p4d_offset(pgd, addr);
|
||||
pud_t *pud = pud_offset(p4d, addr);
|
||||
pmd_t *pmd = pmd_offset(pud, addr);
|
||||
struct address_space *mapping;
|
||||
pgoff_t index;
|
||||
static unsigned long resume;
|
||||
static unsigned long nr_shown;
|
||||
static unsigned long nr_unshown;
|
||||
|
|
@ -518,7 +504,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
|
|||
if (nr_shown == 60) {
|
||||
if (time_before(jiffies, resume)) {
|
||||
nr_unshown++;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
if (nr_unshown) {
|
||||
pr_alert("BUG: Bad page map: %lu messages suppressed\n",
|
||||
|
|
@ -529,15 +515,91 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
|
|||
}
|
||||
if (nr_shown++ == 0)
|
||||
resume = jiffies + 60 * HZ;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void __print_bad_page_map_pgtable(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
unsigned long long pgdv, p4dv, pudv, pmdv;
|
||||
p4d_t p4d, *p4dp;
|
||||
pud_t pud, *pudp;
|
||||
pmd_t pmd, *pmdp;
|
||||
pgd_t *pgdp;
|
||||
|
||||
/*
|
||||
* Although this looks like a fully lockless pgtable walk, it is not:
|
||||
* see locking requirements for print_bad_page_map().
|
||||
*/
|
||||
pgdp = pgd_offset(mm, addr);
|
||||
pgdv = pgd_val(*pgdp);
|
||||
|
||||
if (!pgd_present(*pgdp) || pgd_leaf(*pgdp)) {
|
||||
pr_alert("pgd:%08llx\n", pgdv);
|
||||
return;
|
||||
}
|
||||
|
||||
p4dp = p4d_offset(pgdp, addr);
|
||||
p4d = p4dp_get(p4dp);
|
||||
p4dv = p4d_val(p4d);
|
||||
|
||||
if (!p4d_present(p4d) || p4d_leaf(p4d)) {
|
||||
pr_alert("pgd:%08llx p4d:%08llx\n", pgdv, p4dv);
|
||||
return;
|
||||
}
|
||||
|
||||
pudp = pud_offset(p4dp, addr);
|
||||
pud = pudp_get(pudp);
|
||||
pudv = pud_val(pud);
|
||||
|
||||
if (!pud_present(pud) || pud_leaf(pud)) {
|
||||
pr_alert("pgd:%08llx p4d:%08llx pud:%08llx\n", pgdv, p4dv, pudv);
|
||||
return;
|
||||
}
|
||||
|
||||
pmdp = pmd_offset(pudp, addr);
|
||||
pmd = pmdp_get(pmdp);
|
||||
pmdv = pmd_val(pmd);
|
||||
|
||||
/*
|
||||
* Dumping the PTE would be nice, but it's tricky with CONFIG_HIGHPTE,
|
||||
* because the table should already be mapped by the caller and
|
||||
* doing another map would be bad. print_bad_page_map() should
|
||||
* already take care of printing the PTE.
|
||||
*/
|
||||
pr_alert("pgd:%08llx p4d:%08llx pud:%08llx pmd:%08llx\n", pgdv,
|
||||
p4dv, pudv, pmdv);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called to print an error when a bad page table entry (e.g.,
|
||||
* corrupted page table entry) is found. For example, we might have a
|
||||
* PFN-mapped pte in a region that doesn't allow it.
|
||||
*
|
||||
* The calling function must still handle the error.
|
||||
*
|
||||
* This function must be called during a proper page table walk, as it will
|
||||
* re-walk the page table to dump information: the caller MUST prevent page
|
||||
* table teardown (by holding mmap, vma or rmap lock) and MUST hold the leaf
|
||||
* page table lock.
|
||||
*/
|
||||
static void print_bad_page_map(struct vm_area_struct *vma,
|
||||
unsigned long addr, unsigned long long entry, struct page *page,
|
||||
enum pgtable_level level)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
pgoff_t index;
|
||||
|
||||
if (is_bad_page_map_ratelimited())
|
||||
return;
|
||||
|
||||
mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
|
||||
index = linear_page_index(vma, addr);
|
||||
|
||||
pr_alert("BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n",
|
||||
current->comm,
|
||||
(long long)pte_val(pte), (long long)pmd_val(*pmd));
|
||||
pr_alert("BUG: Bad page map in process %s %s:%08llx", current->comm,
|
||||
pgtable_level_to_str(level), entry);
|
||||
__print_bad_page_map_pgtable(vma->vm_mm, addr);
|
||||
if (page)
|
||||
dump_page(page, "bad pte");
|
||||
dump_page(page, "bad page map");
|
||||
pr_alert("addr:%px vm_flags:%08lx anon_vma:%px mapping:%px index:%lx\n",
|
||||
(void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
|
||||
pr_alert("file:%pD fault:%ps mmap:%ps mmap_prepare: %ps read_folio:%ps\n",
|
||||
|
|
@ -549,6 +611,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
|
|||
dump_stack();
|
||||
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
|
||||
}
|
||||
#define print_bad_pte(vma, addr, pte, page) \
|
||||
print_bad_page_map(vma, addr, pte_val(pte), page, PGTABLE_LEVEL_PTE)
|
||||
|
||||
/*
|
||||
* vm_normal_page -- This function gets the "struct page" associated with a pte.
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user