From 9785df3fd67083ac10f6bde83a316286044a66f1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 7 Apr 2026 14:45:22 +0800 Subject: [PATCH 1/4] iommu/vt-d: Simplify calculate_psi_aligned_address() This is doing far too much math for the simple task of finding a power of 2 that fully spans the given range. Use fls directly on the xor which computes the common binary prefix. Signed-off-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/0-v2-895748900b39+5303-iommupt_inv_vtd_jgg@nvidia.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/cache.c | 49 ++++++++++++++----------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index be8410f0e841..fdc88817709f 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -254,37 +254,29 @@ void cache_tag_unassign_domain(struct dmar_domain *domain, } static unsigned long calculate_psi_aligned_address(unsigned long start, - unsigned long end, - unsigned long *_mask) + unsigned long last, + unsigned long *size_order) { - unsigned long pages = aligned_nrpages(start, end - start + 1); - unsigned long aligned_pages = __roundup_pow_of_two(pages); - unsigned long bitmask = aligned_pages - 1; - unsigned long mask = ilog2(aligned_pages); - unsigned long pfn = IOVA_PFN(start); - - /* - * PSI masks the low order bits of the base address. If the - * address isn't aligned to the mask, then compute a mask value - * needed to ensure the target range is flushed. - */ - if (unlikely(bitmask & pfn)) { - unsigned long end_pfn = pfn + pages - 1, shared_bits; + unsigned int sz_lg2; + /* Compute a sz_lg2 that spans start and last */ + start &= GENMASK(BITS_PER_LONG - 1, VTD_PAGE_SHIFT); + sz_lg2 = fls_long(start ^ last); + if (sz_lg2 <= 12) { + *size_order = 0; + return start; + } + if (unlikely(sz_lg2 >= BITS_PER_LONG)) { /* - * Since end_pfn <= pfn + bitmask, the only way bits - * higher than bitmask can differ in pfn and end_pfn is - * by carrying. This means after masking out bitmask, - * high bits starting with the first set bit in - * shared_bits are all equal in both pfn and end_pfn. + * MAX_AGAW_PFN_WIDTH triggers full invalidation in all + * downstream users. */ - shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; + *size_order = MAX_AGAW_PFN_WIDTH; + return 0; } - *_mask = mask; - - return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask); + *size_order = sz_lg2 - VTD_PAGE_SHIFT; + return start & GENMASK(BITS_PER_LONG - 1, sz_lg2); } static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch) @@ -441,12 +433,7 @@ void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start, struct cache_tag *tag; unsigned long flags; - if (start == 0 && end == ULONG_MAX) { - addr = 0; - mask = MAX_AGAW_PFN_WIDTH; - } else { - addr = calculate_psi_aligned_address(start, end, &mask); - } + addr = calculate_psi_aligned_address(start, end, &mask); spin_lock_irqsave(&domain->cache_lock, flags); list_for_each_entry(tag, &domain->cache_tags, node) { From 4c9ad387aa2d6785299722e54224d34764edaeb3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2026 16:53:54 +0200 Subject: [PATCH 2/4] iommu, debugobjects: avoid gcc-16.1 section mismatch warnings gcc-16 has gained some more advanced inter-procedual optimization techniques that enable it to inline the dummy_tlb_add_page() and dummy_tlb_flush() function pointers into a specialized version of __arm_v7s_unmap: WARNING: modpost: vmlinux: section mismatch in reference: __arm_v7s_unmap+0x2cc (section: .text) -> dummy_tlb_add_page (section: .init.text) ERROR: modpost: Section mismatches detected. >From what I can tell, the transformation is correct, as this is only called when __arm_v7s_unmap() is called from arm_v7s_do_selftests(), which is also __init. Since __arm_v7s_unmap() however is not __init, gcc cannot inline the inner function calls directly. In debug_objects_selftest(), the same thing happens. Both the caller and the leaf function are __init, but the IPA pulls it into a non-init one: WARNING: modpost: vmlinux: section mismatch in reference: lookup_object_or_alloc+0x7c (section: .text.lookup_object_or_alloc) -> is_static_object (section: .init.text) Marking the affected functions as not "__init" would reliably avoid this issue but is not a good solution because it removes an otherwise correct annotation. I tried marking the functions as 'noinline', but that ended up not covering all the affected configurations. With some more experimenting, I found that marking these functions as __attribute__((noipa)) is both logical and reliable. In order to keep the syntax readable, add a custom macro for this in include/linux/compiler_attributes.h next to other related macros and use it to annotate both files. Link: https://lore.kernel.org/all/abRB6g-48ZX6Yl2r@willie-the-truck/ Cc: Will Deacon Cc: Thomas Gleixner Cc: Andrew Morton Cc: Miguel Ojeda Cc: linux-kbuild@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Acked-by: Will Deacon Acked-by: Thomas Gleixner Acked-by: Miguel Ojeda Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 18 ++++++++++++------ include/linux/compiler_attributes.h | 11 +++++++++++ lib/debugobjects.c | 2 +- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 40e33257d3c2..1dbef8c55007 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -777,21 +777,27 @@ struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = { static struct io_pgtable_cfg *cfg_cookie __initdata; -static void __init dummy_tlb_flush_all(void *cookie) +/* + * __noipa prevents gcc from turning indirect iommu_flush_ops calls + * into direct calls from a specialized __arm_v7s_unmap() that triggers + * a build time section mismatch assertion. + */ +static __noipa void __init dummy_tlb_flush_all(void *cookie) { WARN_ON(cookie != cfg_cookie); } -static void __init dummy_tlb_flush(unsigned long iova, size_t size, - size_t granule, void *cookie) +static __noipa void __init dummy_tlb_flush(unsigned long iova, size_t size, + size_t granule, void *cookie) { WARN_ON(cookie != cfg_cookie); WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } -static void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, - unsigned long iova, size_t granule, - void *cookie) +static __noipa void __init dummy_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, + size_t granule, + void *cookie) { dummy_tlb_flush(iova, granule, granule, cookie); } diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index c16d4199bf92..836a50f5917a 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -396,6 +396,17 @@ # define __disable_sanitizer_instrumentation #endif +/* + * Optional: not supported by clang + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Attributes.html#index-noipa + */ +#if __has_attribute(noipa) +# define __noipa __attribute__((noipa)) +#else +# define __noipa +#endif + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 12e2e42e6a31..c93b7ca3e1ab 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -1212,7 +1212,7 @@ struct self_test { static __initconst const struct debug_obj_descr descr_type_test; -static bool __init is_static_object(void *addr) +static __noipa bool __init is_static_object(void *addr) { struct self_test *obj = addr; From 2e4eed207cb4ba513e4a1fdcecbb3732e98f4914 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 May 2026 09:53:17 +0200 Subject: [PATCH 3/4] MAINTAINERS: Add Vasant Hegde to reviewers of AMD IOMMU Vasant has a long history of providing valuable feedback and testing results for the AMD IOMMU code. Still, too often he gets not Cc'ed on code changes, so make his reviewer status official. Acked-by: Vasant Hegde Signed-off-by: Joerg Roedel --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c2c6d79275c6..34293e53129f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1152,6 +1152,7 @@ F: drivers/platform/x86/amd/hfi/ AMD IOMMU (AMD-VI) M: Joerg Roedel R: Suravee Suthikulpanit +R: Vasant Hegde L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git From 00c257948900fae69dae2a055b378edf09aacf6e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 May 2026 09:53:18 +0200 Subject: [PATCH 4/4] MAINTAINERS: Add my employer to my entries AMD pays for my IOMMU maintainer work, so mention that in the MAINTAINERS file as well. Signed-off-by: Joerg Roedel --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 34293e53129f..52e538030cf9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1150,7 +1150,7 @@ F: Documentation/arch/x86/amd-hfi.rst F: drivers/platform/x86/amd/hfi/ AMD IOMMU (AMD-VI) -M: Joerg Roedel +M: Joerg Roedel (AMD) R: Suravee Suthikulpanit R: Vasant Hegde L: iommu@lists.linux.dev @@ -13482,7 +13482,7 @@ F: include/linux/iommu-dma.h F: include/linux/iova.h IOMMU SUBSYSTEM -M: Joerg Roedel +M: Joerg Roedel (AMD) M: Will Deacon R: Robin Murphy L: iommu@lists.linux.dev