From 2a17a5bebc9aa7f59e99676350866adc41577c03 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 7 Dec 2024 11:09:27 +0100 Subject: [PATCH 01/19] powerpc/32: Replace mulhdu() by mul_u64_u64_shr() Using mul_u64_u64_shr() provides similar calculation as mulhdu() assembly function, but enables inlining by the compiler. The home-made assembly function had special handling for when one of the arguments is not a fully populated u64 but time functions use it to multiply timebase by a calculated scale which is constructed to have most significant bit set. On mpc8xx sched_clock() runs 3% faster. On mpc83xx it is 2%. As you can see below, sched_clock() is not much bigger than before: c000cf68 : c000cf68: 7d 2d 42 a6 mftbu r9 c000cf6c: 7d 0c 42 a6 mftb r8 c000cf70: 7d 4d 42 a6 mftbu r10 c000cf74: 7c 09 50 40 cmplw r9,r10 c000cf78: 40 82 ff f0 bne c000cf68 c000cf7c: 3d 40 c1 37 lis r10,-16073 c000cf80: 38 8a b3 30 addi r4,r10,-19664 c000cf84: 80 ea b3 30 lwz r7,-19664(r10) c000cf88: 80 64 00 14 lwz r3,20(r4) c000cf8c: 39 40 00 00 li r10,0 c000cf90: 80 a4 00 04 lwz r5,4(r4) c000cf94: 80 c4 00 10 lwz r6,16(r4) c000cf98: 7c 63 40 10 subfc r3,r3,r8 c000cf9c: 80 84 00 08 lwz r4,8(r4) c000cfa0: 7d 06 49 10 subfe r8,r6,r9 c000cfa4: 7c c7 19 d6 mullw r6,r7,r3 c000cfa8: 7d 25 18 16 mulhwu r9,r5,r3 c000cfac: 7c 08 29 d6 mullw r0,r8,r5 c000cfb0: 7c 67 18 16 mulhwu r3,r7,r3 c000cfb4: 7d 29 30 14 addc r9,r9,r6 c000cfb8: 7c a8 28 16 mulhwu r5,r8,r5 c000cfbc: 7c ca 51 14 adde r6,r10,r10 c000cfc0: 7d 67 41 d6 mullw r11,r7,r8 c000cfc4: 7d 29 00 14 addc r9,r9,r0 c000cfc8: 7c c6 01 94 addze r6,r6 c000cfcc: 7c 63 28 14 addc r3,r3,r5 c000cfd0: 7d 4a 51 14 adde r10,r10,r10 c000cfd4: 7c e7 40 16 mulhwu r7,r7,r8 c000cfd8: 7c 63 58 14 addc r3,r3,r11 c000cfdc: 7d 4a 01 94 addze r10,r10 c000cfe0: 7c 63 30 14 addc r3,r3,r6 c000cfe4: 7d 4a 39 14 adde r10,r10,r7 c000cfe8: 35 24 ff e0 addic. r9,r4,-32 c000cfec: 41 80 00 10 blt c000cffc c000cff0: 7c 63 48 30 slw r3,r3,r9 c000cff4: 38 80 00 00 li r4,0 c000cff8: 4e 80 00 20 blr c000cffc: 21 04 00 1f subfic r8,r4,31 c000d000: 54 69 f8 7e srwi r9,r3,1 c000d004: 7d 4a 20 30 slw r10,r10,r4 c000d008: 7d 29 44 30 srw r9,r9,r8 c000d00c: 7c 64 20 30 slw r4,r3,r4 c000d010: 7d 23 53 78 or r3,r9,r10 c000d014: 4e 80 00 20 blr Before this change: c000d0bc : c000d0bc: 94 21 ff f0 stwu r1,-16(r1) c000d0c0: 7c 08 02 a6 mflr r0 c000d0c4: 90 01 00 14 stw r0,20(r1) c000d0c8: 93 e1 00 0c stw r31,12(r1) c000d0cc: 7d 2d 42 a6 mftbu r9 c000d0d0: 7d 0c 42 a6 mftb r8 c000d0d4: 7d 4d 42 a6 mftbu r10 c000d0d8: 7c 09 50 40 cmplw r9,r10 c000d0dc: 40 82 ff f0 bne c000d0cc c000d0e0: 3f e0 c1 37 lis r31,-16073 c000d0e4: 3b ff b3 30 addi r31,r31,-19664 c000d0e8: 80 9f 00 14 lwz r4,20(r31) c000d0ec: 80 7f 00 10 lwz r3,16(r31) c000d0f0: 7c 84 40 10 subfc r4,r4,r8 c000d0f4: 80 bf 00 00 lwz r5,0(r31) c000d0f8: 80 df 00 04 lwz r6,4(r31) c000d0fc: 7c 63 49 10 subfe r3,r3,r9 c000d100: 48 00 37 85 bl c0010884 c000d104: 81 3f 00 08 lwz r9,8(r31) c000d108: 35 49 ff e0 addic. r10,r9,-32 c000d10c: 41 80 00 20 blt c000d12c c000d110: 80 01 00 14 lwz r0,20(r1) c000d114: 7c 83 50 30 slw r3,r4,r10 c000d118: 83 e1 00 0c lwz r31,12(r1) c000d11c: 38 80 00 00 li r4,0 c000d120: 7c 08 03 a6 mtlr r0 c000d124: 38 21 00 10 addi r1,r1,16 c000d128: 4e 80 00 20 blr c000d12c: 80 01 00 14 lwz r0,20(r1) c000d130: 54 8a f8 7e srwi r10,r4,1 c000d134: 21 09 00 1f subfic r8,r9,31 c000d138: 83 e1 00 0c lwz r31,12(r1) c000d13c: 7c 63 48 30 slw r3,r3,r9 c000d140: 7d 4a 44 30 srw r10,r10,r8 c000d144: 7c 84 48 30 slw r4,r4,r9 c000d148: 7d 43 1b 78 or r3,r10,r3 c000d14c: 7c 08 03 a6 mtlr r0 c000d150: 38 21 00 10 addi r1,r1,16 c000d154: 4e 80 00 20 blr c0010884 : c0010884: 2c 06 00 00 cmpwi r6,0 c0010888: 2c 83 00 00 cmpwi cr1,r3,0 c001088c: 7c 8a 23 78 mr r10,r4 c0010890: 7c 84 28 16 mulhwu r4,r4,r5 c0010894: 41 82 00 14 beq c00108a8 c0010898: 7c 0a 30 16 mulhwu r0,r10,r6 c001089c: 7c ea 29 d6 mullw r7,r10,r5 c00108a0: 7c e0 38 14 addc r7,r0,r7 c00108a4: 7c 84 01 94 addze r4,r4 c00108a8: 4d 86 00 20 beqlr cr1 c00108ac: 7d 23 29 d6 mullw r9,r3,r5 c00108b0: 7d 43 28 16 mulhwu r10,r3,r5 c00108b4: 41 82 00 18 beq c00108cc c00108b8: 7c 03 31 d6 mullw r0,r3,r6 c00108bc: 7d 03 30 16 mulhwu r8,r3,r6 c00108c0: 7c e0 38 14 addc r7,r0,r7 c00108c4: 7c 84 41 14 adde r4,r4,r8 c00108c8: 7d 4a 01 94 addze r10,r10 c00108cc: 7c 84 48 14 addc r4,r4,r9 c00108d0: 7c 6a 01 94 addze r3,r10 c00108d4: 4e 80 00 20 blr Signed-off-by: Christophe Leroy Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/f29e473c193c87bdbd36b209dfdee99d2f0c60dc.1733566130.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/time.h | 2 +- arch/powerpc/kernel/misc_32.S | 26 -------------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 221c8f8ff89b..9bdd8080299b 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -86,7 +86,7 @@ static inline unsigned long tb_ticks_since(unsigned long tstamp) #define mulhdu(x,y) \ ({unsigned long z; asm ("mulhdu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;}) #else -extern u64 mulhdu(u64, u64); +#define mulhdu(x, y) mul_u64_u64_shr(x, y, 64) #endif extern void div128_by_32(u64 dividend_high, u64 dividend_low, diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 033cd00aa0fc..acb727f54e9d 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -27,32 +27,6 @@ .text -/* - * This returns the high 64 bits of the product of two 64-bit numbers. - */ -_GLOBAL(mulhdu) - cmpwi r6,0 - cmpwi cr1,r3,0 - mr r10,r4 - mulhwu r4,r4,r5 - beq 1f - mulhwu r0,r10,r6 - mullw r7,r10,r5 - addc r7,r0,r7 - addze r4,r4 -1: beqlr cr1 /* all done if high part of A is 0 */ - mullw r9,r3,r5 - mulhwu r10,r3,r5 - beq 2f - mullw r0,r3,r6 - mulhwu r8,r3,r6 - addc r7,r0,r7 - adde r4,r4,r8 - addze r10,r10 -2: addc r4,r4,r9 - addze r3,r10 - blr - /* * reloc_got2 runs through the .got2 section adding an offset * to each entry. From 6dca1d3af16a82552294596b66fee9e13eed0795 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 29 Nov 2024 18:33:35 +0100 Subject: [PATCH 02/19] powerpc/xmon: Use str_yes_no() helper in dump_one_paca() Remove hard-coded strings by using the str_yes_no() helper function. Signed-off-by: Thorsten Blum Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241129173337.57890-2-thorsten.blum@linux.dev --- arch/powerpc/xmon/xmon.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index f4e841a36458..268859e4df87 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2623,9 +2623,9 @@ static void dump_one_paca(int cpu) printf("paca for cpu 0x%x @ %px:\n", cpu, p); - printf(" %-*s = %s\n", 25, "possible", cpu_possible(cpu) ? "yes" : "no"); - printf(" %-*s = %s\n", 25, "present", cpu_present(cpu) ? "yes" : "no"); - printf(" %-*s = %s\n", 25, "online", cpu_online(cpu) ? "yes" : "no"); + printf(" %-*s = %s\n", 25, "possible", str_yes_no(cpu_possible(cpu))); + printf(" %-*s = %s\n", 25, "present", str_yes_no(cpu_present(cpu))); + printf(" %-*s = %s\n", 25, "online", str_yes_no(cpu_online(cpu))); #define DUMP(paca, name, format) \ printf(" %-*s = "format"\t(0x%lx)\n", 25, #name, 18, paca->name, \ From 3a7a53c8d4813ef510a731f529b8c58208ab8896 Mon Sep 17 00:00:00 2001 From: Zhu Jun Date: Wed, 4 Dec 2024 00:01:49 -0800 Subject: [PATCH 03/19] selftests/powerpc: Fix typo in test-vphn.c The word 'accross' is wrong, so fix it. Signed-off-by: Zhu Jun Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241204080149.11759-1-zhujun2@cmss.chinamobile.com --- tools/testing/selftests/powerpc/vphn/test-vphn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c index 81d3069ffb84..f348f54914a9 100644 --- a/tools/testing/selftests/powerpc/vphn/test-vphn.c +++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c @@ -275,7 +275,7 @@ static struct test { } }, { - /* Parse a 32-bit value split accross two consecutives 64-bit + /* Parse a 32-bit value split across two consecutives 64-bit * input values. */ "vphn: 16-bit value followed by 2 x 32-bit values", From 5731d41af924b764f32532d39d37a15f669c1e01 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 10 Dec 2024 16:40:54 +1100 Subject: [PATCH 04/19] cxl: Deprecate driver The cxl driver is no longer actively maintained and we intend to remove it in a future kernel release. cxl has received minimal maintenance for several years, and is not supported on the Power10 processor. We aren't aware of any users who are likely to be using recent kernels. Change its MAINTAINERS status to obsolete, update the sysfs ABI documentation accordingly, add a warning message on device probe, change the Kconfig options to label it as deprecated, and don't build it by default. Signed-off-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241210054055.144813-2-ajd@linux.ibm.com --- Documentation/ABI/{testing => obsolete}/sysfs-class-cxl | 3 +++ MAINTAINERS | 4 ++-- drivers/misc/cxl/Kconfig | 6 ++++-- drivers/misc/cxl/of.c | 2 ++ drivers/misc/cxl/pci.c | 2 ++ 5 files changed, 13 insertions(+), 4 deletions(-) rename Documentation/ABI/{testing => obsolete}/sysfs-class-cxl (99%) diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/obsolete/sysfs-class-cxl similarity index 99% rename from Documentation/ABI/testing/sysfs-class-cxl rename to Documentation/ABI/obsolete/sysfs-class-cxl index cfc48a87706b..8cba1b626985 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/obsolete/sysfs-class-cxl @@ -1,3 +1,6 @@ +The cxl driver is no longer maintained, and will be removed from the kernel in +the near future. + Please note that attributes that are shared between devices are stored in the directory pointed to by the symlink device/. For example, the real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is diff --git a/MAINTAINERS b/MAINTAINERS index 17daa9ee9384..1737a8ff4f2b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6228,8 +6228,8 @@ CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER M: Frederic Barrat M: Andrew Donnellan L: linuxppc-dev@lists.ozlabs.org -S: Supported -F: Documentation/ABI/testing/sysfs-class-cxl +S: Obsolete +F: Documentation/ABI/obsolete/sysfs-class-cxl F: Documentation/arch/powerpc/cxl.rst F: arch/powerpc/platforms/powernv/pci-cxl.c F: drivers/misc/cxl/ diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index 5efc4151bf58..15307f5e4307 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -9,11 +9,13 @@ config CXL_BASE select PPC_64S_HASH_MMU config CXL - tristate "Support for IBM Coherent Accelerators (CXL)" + tristate "Support for IBM Coherent Accelerators (CXL) (DEPRECATED)" depends on PPC_POWERNV && PCI_MSI && EEH select CXL_BASE - default m help + The cxl driver is deprecated and will be removed in a future + kernel release. + Select this option to enable driver support for IBM Coherent Accelerators (CXL). CXL is otherwise known as Coherent Accelerator Processor Interface (CAPI). CAPI allows accelerators in FPGAs to be diff --git a/drivers/misc/cxl/of.c b/drivers/misc/cxl/of.c index cf6bd8a43056..e26ee85279fa 100644 --- a/drivers/misc/cxl/of.c +++ b/drivers/misc/cxl/of.c @@ -295,6 +295,8 @@ int cxl_of_probe(struct platform_device *pdev) int ret; int slice = 0, slice_ok = 0; + dev_err_once(&pdev->dev, "DEPRECATION: cxl is deprecated and will be removed in a future kernel release\n"); + pr_devel("in %s\n", __func__); np = pdev->dev.of_node; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 3d52f9b92d0d..92bf7c5c7b35 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1726,6 +1726,8 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) int slice; int rc; + dev_err_once(&dev->dev, "DEPRECATED: cxl is deprecated and will be removed in a future kernel release\n"); + if (cxl_pci_is_vphb_device(dev)) { dev_dbg(&dev->dev, "cxl_init_adapter: Ignoring cxl vphb device\n"); return -ENODEV; From f117051514c33c43b7e0c517e0ae9e0189e884da Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 10 Dec 2024 16:40:55 +1100 Subject: [PATCH 05/19] scsi/cxlflash: Deprecate driver We intend to remove the cxlflash driver in an upcoming release. It is already marked as Obsolete in MAINTAINERS. The cxlflash driver has received minimal maintenance for some time, and the CAPI Flash hardware that uses it is no longer commercially available. Add a warning message on probe and change Kconfig to label the driver as deprecated and not build the driver by default. Signed-off-by: Andrew Donnellan Reviewed-by: Frederic Barrat Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241210054055.144813-3-ajd@linux.ibm.com --- drivers/scsi/cxlflash/Kconfig | 6 ++++-- drivers/scsi/cxlflash/main.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/cxlflash/Kconfig b/drivers/scsi/cxlflash/Kconfig index 5533bdcb0458..c424d36e89a6 100644 --- a/drivers/scsi/cxlflash/Kconfig +++ b/drivers/scsi/cxlflash/Kconfig @@ -4,10 +4,12 @@ # config CXLFLASH - tristate "Support for IBM CAPI Flash" + tristate "Support for IBM CAPI Flash (DEPRECATED)" depends on PCI && SCSI && (CXL || OCXL) && EEH select IRQ_POLL - default m help + The cxlflash driver is deprecated and will be removed in a future + kernel release. + Allows CAPI Accelerated IO to Flash If unsure, say N. diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 60d62b93d624..62806f5e32e6 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -3651,6 +3651,8 @@ static int cxlflash_probe(struct pci_dev *pdev, int rc = 0; int k; + dev_err_once(&pdev->dev, "DEPRECATION: cxlflash is deprecated and will be removed in a future kernel release\n"); + dev_dbg(&pdev->dev, "%s: Found CXLFLASH with IRQ: %d\n", __func__, pdev->irq); From 34064c8267a61063d684408db6ae78b571a9999d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 18 Nov 2024 14:31:03 +0200 Subject: [PATCH 06/19] powerpc/8xx: Drop legacy-of-mm-gpiochip.h header Remove legacy-of-mm-gpiochip.h header file. The above mentioned file provides an OF API that's deprecated. There is no agnostic alternatives to it and we have to open code the logic which was hidden behind of_mm_gpiochip_add_data(). Note, most of the GPIO drivers are using their own labeling schemas and resource retrieval that only a few may gain of the code deduplication, so whenever alternative is appear we can move drivers again to use that one. As a side effect this change fixes a potential memory leak on an error path, if of_mm_gpiochip_add_data() fails. Signed-off-by: Andy Shevchenko Reviewed-by: Christophe Leroy Acked-by: Christophe Leroy Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241118123254.620519-1-andriy.shevchenko@linux.intel.com --- arch/powerpc/platforms/8xx/cpm1.c | 119 +++++++++++++++--------------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index b24d4102fbf6..1dc095ad48fc 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -45,7 +45,7 @@ #include #ifdef CONFIG_8xx_GPIO -#include +#include #endif #define CPM_MAP_SIZE (0x4000) @@ -376,7 +376,8 @@ int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode) #ifdef CONFIG_8xx_GPIO struct cpm1_gpio16_chip { - struct of_mm_gpio_chip mm_gc; + struct gpio_chip gc; + void __iomem *regs; spinlock_t lock; /* shadowed data register to clear/set bits safely */ @@ -386,19 +387,17 @@ struct cpm1_gpio16_chip { int irq[16]; }; -static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc) +static void cpm1_gpio16_save_regs(struct cpm1_gpio16_chip *cpm1_gc) { - struct cpm1_gpio16_chip *cpm1_gc = - container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc); - struct cpm_ioport16 __iomem *iop = mm_gc->regs; + struct cpm_ioport16 __iomem *iop = cpm1_gc->regs; cpm1_gc->cpdata = in_be16(&iop->dat); } static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm_ioport16 __iomem *iop = mm_gc->regs; + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc); + struct cpm_ioport16 __iomem *iop = cpm1_gc->regs; u16 pin_mask; pin_mask = 1 << (15 - gpio); @@ -406,11 +405,9 @@ static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio) return !!(in_be16(&iop->dat) & pin_mask); } -static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask, - int value) +static void __cpm1_gpio16_set(struct cpm1_gpio16_chip *cpm1_gc, u16 pin_mask, int value) { - struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); - struct cpm_ioport16 __iomem *iop = mm_gc->regs; + struct cpm_ioport16 __iomem *iop = cpm1_gc->regs; if (value) cpm1_gc->cpdata |= pin_mask; @@ -422,38 +419,35 @@ static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask, static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc); unsigned long flags; u16 pin_mask = 1 << (15 - gpio); spin_lock_irqsave(&cpm1_gc->lock, flags); - __cpm1_gpio16_set(mm_gc, pin_mask, value); + __cpm1_gpio16_set(cpm1_gc, pin_mask, value); spin_unlock_irqrestore(&cpm1_gc->lock, flags); } static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc); return cpm1_gc->irq[gpio] ? : -ENXIO; } static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); - struct cpm_ioport16 __iomem *iop = mm_gc->regs; + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc); + struct cpm_ioport16 __iomem *iop = cpm1_gc->regs; unsigned long flags; u16 pin_mask = 1 << (15 - gpio); spin_lock_irqsave(&cpm1_gc->lock, flags); setbits16(&iop->dir, pin_mask); - __cpm1_gpio16_set(mm_gc, pin_mask, val); + __cpm1_gpio16_set(cpm1_gc, pin_mask, val); spin_unlock_irqrestore(&cpm1_gc->lock, flags); @@ -462,9 +456,8 @@ static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); - struct cpm_ioport16 __iomem *iop = mm_gc->regs; + struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc); + struct cpm_ioport16 __iomem *iop = cpm1_gc->regs; unsigned long flags; u16 pin_mask = 1 << (15 - gpio); @@ -481,11 +474,10 @@ int cpm1_gpiochip_add16(struct device *dev) { struct device_node *np = dev->of_node; struct cpm1_gpio16_chip *cpm1_gc; - struct of_mm_gpio_chip *mm_gc; struct gpio_chip *gc; u16 mask; - cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL); + cpm1_gc = devm_kzalloc(dev, sizeof(*cpm1_gc), GFP_KERNEL); if (!cpm1_gc) return -ENOMEM; @@ -499,10 +491,8 @@ int cpm1_gpiochip_add16(struct device *dev) cpm1_gc->irq[i] = irq_of_parse_and_map(np, j++); } - mm_gc = &cpm1_gc->mm_gc; - gc = &mm_gc->gc; - - mm_gc->save_regs = cpm1_gpio16_save_regs; + gc = &cpm1_gc->gc; + gc->base = -1; gc->ngpio = 16; gc->direction_input = cpm1_gpio16_dir_in; gc->direction_output = cpm1_gpio16_dir_out; @@ -512,30 +502,39 @@ int cpm1_gpiochip_add16(struct device *dev) gc->parent = dev; gc->owner = THIS_MODULE; - return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc); + gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pOF", np); + if (!gc->label) + return -ENOMEM; + + cpm1_gc->regs = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(cpm1_gc->regs)) + return PTR_ERR(cpm1_gc->regs); + + cpm1_gpio16_save_regs(cpm1_gc); + + return devm_gpiochip_add_data(dev, gc, cpm1_gc); } struct cpm1_gpio32_chip { - struct of_mm_gpio_chip mm_gc; + struct gpio_chip gc; + void __iomem *regs; spinlock_t lock; /* shadowed data register to clear/set bits safely */ u32 cpdata; }; -static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc) +static void cpm1_gpio32_save_regs(struct cpm1_gpio32_chip *cpm1_gc) { - struct cpm1_gpio32_chip *cpm1_gc = - container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc); - struct cpm_ioport32b __iomem *iop = mm_gc->regs; + struct cpm_ioport32b __iomem *iop = cpm1_gc->regs; cpm1_gc->cpdata = in_be32(&iop->dat); } static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm_ioport32b __iomem *iop = mm_gc->regs; + struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc); + struct cpm_ioport32b __iomem *iop = cpm1_gc->regs; u32 pin_mask; pin_mask = 1 << (31 - gpio); @@ -543,11 +542,9 @@ static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio) return !!(in_be32(&iop->dat) & pin_mask); } -static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask, - int value) +static void __cpm1_gpio32_set(struct cpm1_gpio32_chip *cpm1_gc, u32 pin_mask, int value) { - struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); - struct cpm_ioport32b __iomem *iop = mm_gc->regs; + struct cpm_ioport32b __iomem *iop = cpm1_gc->regs; if (value) cpm1_gc->cpdata |= pin_mask; @@ -559,30 +556,28 @@ static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask, static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); + struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc); unsigned long flags; u32 pin_mask = 1 << (31 - gpio); spin_lock_irqsave(&cpm1_gc->lock, flags); - __cpm1_gpio32_set(mm_gc, pin_mask, value); + __cpm1_gpio32_set(cpm1_gc, pin_mask, value); spin_unlock_irqrestore(&cpm1_gc->lock, flags); } static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); - struct cpm_ioport32b __iomem *iop = mm_gc->regs; + struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc); + struct cpm_ioport32b __iomem *iop = cpm1_gc->regs; unsigned long flags; u32 pin_mask = 1 << (31 - gpio); spin_lock_irqsave(&cpm1_gc->lock, flags); setbits32(&iop->dir, pin_mask); - __cpm1_gpio32_set(mm_gc, pin_mask, val); + __cpm1_gpio32_set(cpm1_gc, pin_mask, val); spin_unlock_irqrestore(&cpm1_gc->lock, flags); @@ -591,9 +586,8 @@ static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio) { - struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); - struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc); - struct cpm_ioport32b __iomem *iop = mm_gc->regs; + struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc); + struct cpm_ioport32b __iomem *iop = cpm1_gc->regs; unsigned long flags; u32 pin_mask = 1 << (31 - gpio); @@ -610,19 +604,16 @@ int cpm1_gpiochip_add32(struct device *dev) { struct device_node *np = dev->of_node; struct cpm1_gpio32_chip *cpm1_gc; - struct of_mm_gpio_chip *mm_gc; struct gpio_chip *gc; - cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL); + cpm1_gc = devm_kzalloc(dev, sizeof(*cpm1_gc), GFP_KERNEL); if (!cpm1_gc) return -ENOMEM; spin_lock_init(&cpm1_gc->lock); - mm_gc = &cpm1_gc->mm_gc; - gc = &mm_gc->gc; - - mm_gc->save_regs = cpm1_gpio32_save_regs; + gc = &cpm1_gc->gc; + gc->base = -1; gc->ngpio = 32; gc->direction_input = cpm1_gpio32_dir_in; gc->direction_output = cpm1_gpio32_dir_out; @@ -631,7 +622,17 @@ int cpm1_gpiochip_add32(struct device *dev) gc->parent = dev; gc->owner = THIS_MODULE; - return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc); + gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pOF", np); + if (!gc->label) + return -ENOMEM; + + cpm1_gc->regs = devm_of_iomap(dev, np, 0, NULL); + if (IS_ERR(cpm1_gc->regs)) + return PTR_ERR(cpm1_gc->regs); + + cpm1_gpio32_save_regs(cpm1_gc); + + return devm_gpiochip_add_data(dev, gc, cpm1_gc); } #endif /* CONFIG_8xx_GPIO */ From 026ac4dda8f666f737b375731e30ef8f5698b215 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 16 Dec 2024 21:32:55 +0530 Subject: [PATCH 07/19] selftest/powerpc/ptrace/core-pkey: Remove duplicate macros ./powerpc/ptrace/Makefile includes flags.mk. In flags.mk, -I$(selfdir)/powerpc/include is always included as part of CFLAGS. So it will pick up the "pkeys.h" defined in powerpc/include. core-pkey.c test has couple of macros defined which are part of "pkeys.h" header file. Remove those duplicates and include "pkeys.h" Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241216160257.87252-1-maddy@linux.ibm.com --- .../selftests/powerpc/ptrace/core-pkey.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f6da4cb30cd6..31c9bf6d95db 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -16,14 +16,7 @@ #include #include "ptrace.h" #include "child.h" - -#ifndef __NR_pkey_alloc -#define __NR_pkey_alloc 384 -#endif - -#ifndef __NR_pkey_free -#define __NR_pkey_free 385 -#endif +#include "pkeys.h" #ifndef NT_PPC_PKEY #define NT_PPC_PKEY 0x110 @@ -61,16 +54,6 @@ struct shared_info { time_t core_time; }; -static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) -{ - return syscall(__NR_pkey_alloc, flags, init_access_rights); -} - -static int sys_pkey_free(int pkey) -{ - return syscall(__NR_pkey_free, pkey); -} - static int increase_core_file_limit(void) { struct rlimit rlim; From b0e1b95b1597ad3d87ff91d52f6b67cc9423c31e Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 16 Dec 2024 21:32:56 +0530 Subject: [PATCH 08/19] selftest/powerpc/ptrace/ptrace-pkey: Remove duplicate macros ./powerpc/ptrace/Makefile includes flags.mk. In flags.mk, -I$(selfdir)/powerpc/include is always included as part of CFLAGS. So it will pick up the "pkeys.h" defined in powerpc/include. ptrace-pkey.c test has macros defined which are part of "pkeys.h" header file. Remove those duplicates and include "pkeys.h" Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241216160257.87252-2-maddy@linux.ibm.com --- .../testing/selftests/powerpc/ptrace/ptrace-pkey.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index d89474377f11..6893ed096457 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -7,14 +7,7 @@ */ #include "ptrace.h" #include "child.h" - -#ifndef __NR_pkey_alloc -#define __NR_pkey_alloc 384 -#endif - -#ifndef __NR_pkey_free -#define __NR_pkey_free 385 -#endif +#include "pkeys.h" #ifndef NT_PPC_PKEY #define NT_PPC_PKEY 0x110 @@ -61,11 +54,6 @@ struct shared_info { unsigned long invalid_uamor; }; -static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) -{ - return syscall(__NR_pkey_alloc, flags, init_access_rights); -} - static int child(struct shared_info *info) { unsigned long reg; From 65f5038352e8f635fb827f7482f1d08fae4d16bf Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 16 Dec 2024 21:32:57 +0530 Subject: [PATCH 09/19] selftest/powerpc/ptrace: Cleanup duplicate macro definitions Both core-pkey.c and ptrace-pkey.c tests have similar macro definitions, move them to "pkeys.h" and remove the macro definitions from the C file. Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241216160257.87252-3-maddy@linux.ibm.com --- tools/testing/selftests/powerpc/include/pkeys.h | 8 ++++++++ tools/testing/selftests/powerpc/ptrace/core-pkey.c | 12 ------------ tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c | 12 ------------ 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h index 51729d9a7111..3a0129467de6 100644 --- a/tools/testing/selftests/powerpc/include/pkeys.h +++ b/tools/testing/selftests/powerpc/include/pkeys.h @@ -35,10 +35,18 @@ #define __NR_pkey_alloc 384 #define __NR_pkey_free 385 +#ifndef NT_PPC_PKEY +#define NT_PPC_PKEY 0x110 +#endif + #define PKEY_BITS_PER_PKEY 2 #define NR_PKEYS 32 #define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1) +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64) * 8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) + inline unsigned long pkeyreg_get(void) { return mfspr(SPRN_AMR); diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index 31c9bf6d95db..f061434af452 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -18,18 +18,6 @@ #include "child.h" #include "pkeys.h" -#ifndef NT_PPC_PKEY -#define NT_PPC_PKEY 0x110 -#endif - -#ifndef PKEY_DISABLE_EXECUTE -#define PKEY_DISABLE_EXECUTE 0x4 -#endif - -#define AMR_BITS_PER_PKEY 2 -#define PKEY_REG_BITS (sizeof(u64) * 8) -#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) - #define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */ static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern"; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index 6893ed096457..fc633014424f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -9,18 +9,6 @@ #include "child.h" #include "pkeys.h" -#ifndef NT_PPC_PKEY -#define NT_PPC_PKEY 0x110 -#endif - -#ifndef PKEY_DISABLE_EXECUTE -#define PKEY_DISABLE_EXECUTE 0x4 -#endif - -#define AMR_BITS_PER_PKEY 2 -#define PKEY_REG_BITS (sizeof(u64) * 8) -#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) - static const char user_read[] = "[User Read (Running)]"; static const char user_write[] = "[User Write (Running)]"; static const char ptrace_read_running[] = "[Ptrace Read (Running)]"; From e834166822a3c9fb403411c898367df8dabf973c Mon Sep 17 00:00:00 2001 From: Luis Felipe Hernandez Date: Tue, 17 Dec 2024 21:55:41 -0500 Subject: [PATCH 10/19] macintosh: declare ctl_table as const MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 7abc9b53bd51 ("sysctl: allow registration of const struct ctl_table"), the sysctl registration API allows struct ctl_table variables to be placed into read-only memory. mac_hid_files is registered as a sysctl table and should be treated as read-only. By declaring the mac_hid_files structure as const, we ensure that it cannot be accidentally modified. This change improves safety. Suggested-by: Thomas Weißschuh Suggested-by: Ricardo B. Marliere Reviewed-by: Ricardo B. Marliere Signed-off-by: Luis Felipe Hernandez Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241217-constify_ctl_table-v1-1-402ebceaeb8e@gmail.com --- drivers/macintosh/mac_hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c index b461b1bed25b..369d72f59b3c 100644 --- a/drivers/macintosh/mac_hid.c +++ b/drivers/macintosh/mac_hid.c @@ -215,7 +215,7 @@ static int mac_hid_toggle_emumouse(const struct ctl_table *table, int write, } /* file(s) in /proc/sys/dev/mac_hid */ -static struct ctl_table mac_hid_files[] = { +static const struct ctl_table mac_hid_files[] = { { .procname = "mouse_button_emulation", .data = &mouse_emulate_buttons, From f66dbe43798fc97e8c0e6d9b86f1aa923ef523fa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 16 Dec 2024 23:17:06 +1100 Subject: [PATCH 11/19] powerpc/64: Use get_user() in start_thread() For ELFv1 binaries (big endian), the ELF entry point isn't the address of the first instruction, instead it points to the function descriptor for the entry point. The address of the first instruction is in the function descriptor. That means the kernel has to fetch the address of the first instruction from user memory. Because start_thread() uses __get_user(), which has no access_ok() checks, it looks like a malicious ELF binary could be crafted to point the entry point address at kernel memory. The kernel would load 8 bytes from kernel memory into the NIP and then start the process, it would typically crash, but a debugger could observe the NIP value which would be the result of reading from kernel memory. However that's NOT possible, because there is a check in load_elf_binary() that ensures the ELF entry point is < TASK_SIZE (look for BAD_ADDR(elf_entry)). However it's fragile for start_thread() to rely on a check elsewhere, even if the ELF parser is unlikely to ever drop the check that elf_entry is a user address. Make it more robust by using get_user(), which checks that the address points at userspace before doing the load. If the address doesn't point at userspace it will just set the result to zero, and the userspace program will crash at zero (which is fine because it's self-inflicted). Note that it's also possible for a malicious binary to have a valid ELF entry address, but with the first instruction address pointing into the kernel. However that's OK, because it is blocked by the MMU, just like any other attempt to jump into the kernel from userspace. Reported-by: Linus Torvalds Signed-off-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241216121706.26790-1-mpe@ellerman.id.au --- arch/powerpc/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 7b739b9a91ab..ef91f71e07c4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1960,8 +1960,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) * address of _start and the second entry is the TOC * value we need to use. */ - __get_user(entry, (unsigned long __user *)start); - __get_user(toc, (unsigned long __user *)start+1); + get_user(entry, (unsigned long __user *)start); + get_user(toc, (unsigned long __user *)start+1); /* Check whether the e_entry function descriptor entries * need to be relocated before we can use them. From 9fa9712644e04c4fd4de7e2d999edde3c9316823 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 14 Dec 2024 12:09:31 +0100 Subject: [PATCH 12/19] powerpc/vdso: Mark the vDSO code read-only after init VDSO text is fixed-up during init so it can't be const, but it can be read-only after init. Do the same as x86 in commit 018ef8dcf3de ("x86/vdso: Mark the vDSO code read-only after init") and arm in commit 11bf9b865898 ("ARM/vdso: Mark the vDSO code read-only after init"), move it into ro_after_init section. Signed-off-by: Christophe Leroy Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/e9892d288b646cbdfeef0b2b73edbaf6d3c6cabe.1734174500.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/vdso32_wrapper.S | 2 +- arch/powerpc/kernel/vdso64_wrapper.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 10f92f265d51..20bca3548b44 100644 --- a/arch/powerpc/kernel/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S @@ -2,7 +2,7 @@ #include #include - __PAGE_ALIGNED_DATA + .section ".data..ro_after_init", "aw" .globl vdso32_start, vdso32_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 839d1a61411d..1912936fa227 100644 --- a/arch/powerpc/kernel/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S @@ -2,7 +2,7 @@ #include #include - __PAGE_ALIGNED_DATA + .section ".data..ro_after_init", "aw" .globl vdso64_start, vdso64_end .balign PAGE_SIZE From d629d7a8efc33d05d62f4805c0ffb44727e3d99f Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Tue, 17 Dec 2024 13:16:40 +0530 Subject: [PATCH 13/19] powerpc/book3s64/hugetlb: Fix disabling hugetlb when fadump is active Commit 8597538712eb ("powerpc/fadump: Do not use hugepages when fadump is active") disabled hugetlb support when fadump is active by returning early from hugetlbpage_init():arch/powerpc/mm/hugetlbpage.c and not populating hpage_shift/HPAGE_SHIFT. Later, commit 2354ad252b66 ("powerpc/mm: Update default hugetlb size early") moved the allocation of hpage_shift/HPAGE_SHIFT to early boot, which inadvertently re-enabled hugetlb support when fadump is active. Fix this by implementing hugepages_supported() on powerpc. This ensures that disabling hugetlb for the fadump kernel is independent of hpage_shift/HPAGE_SHIFT. Fixes: 2354ad252b66 ("powerpc/mm: Update default hugetlb size early") Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Sourabh Jain Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241217074640.1064510-1-sourabhjain@linux.ibm.com --- arch/powerpc/include/asm/hugetlb.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 18a3028ac3b6..dad2e7980f24 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -15,6 +15,15 @@ extern bool hugetlb_disabled; +static inline bool hugepages_supported(void) +{ + if (hugetlb_disabled) + return false; + + return HPAGE_SHIFT != 0; +} +#define hugepages_supported hugepages_supported + void __init hugetlbpage_init_defaultsize(void); int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, From 00199ed6f2ca6601b2c5856fac64132303d9437a Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Sun, 17 Nov 2024 00:53:05 +0530 Subject: [PATCH 14/19] powerpc: Add preempt lazy support Define preempt lazy bit for Powerpc. Use bit 9 which is free and within 16 bit range of NEED_RESCHED, so compiler can issue single andi. Since Powerpc doesn't use the generic entry/exit, add lazy check at exit to user. CONFIG_PREEMPTION is defined for lazy/full/rt so use it for return to kernel. Ran a few benchmarks and db workload on Power10. Performance is close to preempt=none/voluntary. Since Powerpc systems can have large core count and large memory, preempt lazy is going to be helpful in avoiding soft lockup issues. Reviewed-by: Sebastian Andrzej Siewior Reviewed-by: Ankur Arora Signed-off-by: Shrikanth Hegde Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241116192306.88217-2-sshegde@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/thread_info.h | 9 ++++++--- arch/powerpc/kernel/interrupt.c | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a0ce777f9706..db9f7b2d07bf 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -145,6 +145,7 @@ config PPC select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API + select ARCH_HAS_PREEMPT_LAZY select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 6ebca2996f18..2785c7462ebf 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -103,6 +103,7 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ +#define TIF_NEED_RESCHED_LAZY 9 /* Scheduler driven lazy preemption */ #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ @@ -122,6 +123,7 @@ void arch_setup_new_exec(void); #define _TIF_SYSCALL_TRACE (1<msr & MSR_EE)); again: - if (IS_ENABLED(CONFIG_PREEMPT)) { + if (IS_ENABLED(CONFIG_PREEMPTION)) { /* Return to preemptible kernel context */ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) From eda86a41a1c7700757c9217f74b9d57431c3e5f4 Mon Sep 17 00:00:00 2001 From: Shrikanth Hegde Date: Sun, 17 Nov 2024 00:53:06 +0530 Subject: [PATCH 15/19] powerpc: Large user copy aware of full:rt:lazy preemption Large user copy_to/from (more than 16 bytes) uses vmx instructions to speed things up. Once the copy is done, it makes sense to try schedule as soon as possible for preemptible kernels. So do this for preempt=full/lazy and rt kernel. Not checking for lazy bit here, since it could lead to unnecessary context switches. Suggested-by: Sebastian Andrzej Siewior Signed-off-by: Shrikanth Hegde Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241116192306.88217-3-sshegde@linux.ibm.com --- arch/powerpc/lib/vmx-helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index d491da8d1838..58ed6bd613a6 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -45,7 +45,7 @@ int exit_vmx_usercopy(void) * set and we are preemptible. The hack here is to schedule a * decrementer to fire here and reschedule for us if necessary. */ - if (IS_ENABLED(CONFIG_PREEMPT) && need_resched()) + if (IS_ENABLED(CONFIG_PREEMPTION) && need_resched()) set_dec(1); return 0; } From 26bef359bc4f10747f8d0b3a7f3fe60ef99ce2c1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Fri, 20 Dec 2024 20:17:04 +0100 Subject: [PATCH 16/19] powerpc: Use str_on_off() helper in check_cache_coherency() Remove hard-coded strings by using the str_on_off() helper function. Signed-off-by: Thorsten Blum Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241220191705.1446-2-thorsten.blum@linux.dev --- arch/powerpc/kernel/setup-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 6fa179448c33..f7d7a93f07fc 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -834,8 +834,8 @@ static int __init check_cache_coherency(void) if (devtree_coherency != KERNEL_COHERENCY) { printk(KERN_ERR "kernel coherency:%s != device tree_coherency:%s\n", - KERNEL_COHERENCY ? "on" : "off", - devtree_coherency ? "on" : "off"); + str_on_off(KERNEL_COHERENCY), + str_on_off(devtree_coherency)); BUG(); } From 8f70caad82e9c088ed93b4fea48d941ab6441886 Mon Sep 17 00:00:00 2001 From: Gaurav Batra Date: Fri, 6 Dec 2024 15:00:39 -0600 Subject: [PATCH 17/19] powerpc/pseries/iommu: IOMMU incorrectly marks MMIO range in DDW Power Hypervisor can possibily allocate MMIO window intersecting with Dynamic DMA Window (DDW) range, which is over 32-bit addressing. These MMIO pages needs to be marked as reserved so that IOMMU doesn't map DMA buffers in this range. The current code is not marking these pages correctly which is resulting in LPAR to OOPS while booting. The stack is at below BUG: Unable to handle kernel data access on read at 0xc00800005cd40000 Faulting instruction address: 0xc00000000005cdac Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: af_packet rfkill ibmveth(X) lpfc(+) nvmet_fc nvmet nvme_keyring crct10dif_vpmsum nvme_fc nvme_fabrics nvme_core be2net(+) nvme_auth rtc_generic nfsd auth_rpcgss nfs_acl lockd grace sunrpc fuse configfs ip_tables x_tables xfs libcrc32c dm_service_time ibmvfc(X) scsi_transport_fc vmx_crypto gf128mul crc32c_vpmsum dm_mirror dm_region_hash dm_log dm_multipath dm_mod sd_mod scsi_dh_emc scsi_dh_rdac scsi_dh_alua t10_pi crc64_rocksoft_generic crc64_rocksoft sg crc64 scsi_mod Supported: Yes, External CPU: 8 PID: 241 Comm: kworker/8:1 Kdump: loaded Not tainted 6.4.0-150600.23.14-default #1 SLE15-SP6 b44ee71c81261b9e4bab5e0cde1f2ed891d5359b Hardware name: IBM,9080-M9S POWER9 (raw) 0x4e2103 0xf000005 of:IBM,FW950.B0 (VH950_149) hv:phyp pSeries Workqueue: events work_for_cpu_fn NIP: c00000000005cdac LR: c00000000005e830 CTR: 0000000000000000 REGS: c00001400c9ff770 TRAP: 0300 Not tainted (6.4.0-150600.23.14-default) MSR: 800000000280b033 CR: 24228448 XER: 00000001 CFAR: c00000000005cdd4 DAR: c00800005cd40000 DSISR: 40000000 IRQMASK: 0 GPR00: c00000000005e830 c00001400c9ffa10 c000000001987d00 c00001400c4fe800 GPR04: 0000080000000000 0000000000000001 0000000004000000 0000000000800000 GPR08: 0000000004000000 0000000000000001 c00800005cd40000 ffffffffffffffff GPR12: 0000000084228882 c00000000a4c4f00 0000000000000010 0000080000000000 GPR16: c00001400c4fe800 0000000004000000 0800000000000000 c00000006088b800 GPR20: c00001401a7be980 c00001400eff3800 c000000002a2da68 000000000000002b GPR24: c0000000026793a8 c000000002679368 000000000000002a c0000000026793c8 GPR28: 000008007effffff 0000080000000000 0000000000800000 c00001400c4fe800 NIP [c00000000005cdac] iommu_table_reserve_pages+0xac/0x100 LR [c00000000005e830] iommu_init_table+0x80/0x1e0 Call Trace: [c00001400c9ffa10] [c00000000005e810] iommu_init_table+0x60/0x1e0 (unreliable) [c00001400c9ffa90] [c00000000010356c] iommu_bypass_supported_pSeriesLP+0x9cc/0xe40 [c00001400c9ffc30] [c00000000005c300] dma_iommu_dma_supported+0xf0/0x230 [c00001400c9ffcb0] [c00000000024b0c4] dma_supported+0x44/0x90 [c00001400c9ffcd0] [c00000000024b14c] dma_set_mask+0x3c/0x80 [c00001400c9ffd00] [c0080000555b715c] be_probe+0xc4/0xb90 [be2net] [c00001400c9ffdc0] [c000000000986f3c] local_pci_probe+0x6c/0x110 [c00001400c9ffe40] [c000000000188f28] work_for_cpu_fn+0x38/0x60 [c00001400c9ffe70] [c00000000018e454] process_one_work+0x314/0x620 [c00001400c9fff10] [c00000000018f280] worker_thread+0x2b0/0x620 [c00001400c9fff90] [c00000000019bb18] kthread+0x148/0x150 [c00001400c9fffe0] [c00000000000ded8] start_kernel_thread+0x14/0x18 There are 2 issues in the code 1. The index is "int" while the address is "unsigned long". This results in negative value when setting the bitmap. 2. The DMA offset is page shifted but the MMIO range is used as-is (64-bit address). MMIO address needs to be page shifted as well. Fixes: 3c33066a2190 ("powerpc/kernel/iommu: Add new iommu_table_in_use() helper") Signed-off-by: Gaurav Batra Reviewed-by: Nilay Shroff Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241206210039.93172-1-gbatra@linux.ibm.com --- arch/powerpc/kernel/iommu.c | 2 +- arch/powerpc/platforms/pseries/iommu.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 76381e14e800..0ebae6e4c19d 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -687,7 +687,7 @@ void iommu_table_clear(struct iommu_table *tbl) void iommu_table_reserve_pages(struct iommu_table *tbl, unsigned long res_start, unsigned long res_end) { - int i; + unsigned long i; WARN_ON_ONCE(res_end < res_start); /* diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 534cd159e9ab..29f1a0cc59cd 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1650,7 +1650,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, dynamic_addr, dynamic_len, page_shift, NULL, &iommu_table_lpar_multi_ops); - iommu_init_table(newtbl, pci->phb->node, start, end); + iommu_init_table(newtbl, pci->phb->node, + start >> page_shift, end >> page_shift); pci->table_group->tables[default_win_removed ? 0 : 1] = newtbl; @@ -2065,7 +2066,9 @@ static long spapr_tce_create_table(struct iommu_table_group *table_group, int nu offset, 1UL << window_shift, IOMMU_PAGE_SHIFT_4K, NULL, &iommu_table_lpar_multi_ops); - iommu_init_table(tbl, pci->phb->node, start, end); + iommu_init_table(tbl, pci->phb->node, + start >> IOMMU_PAGE_SHIFT_4K, + end >> IOMMU_PAGE_SHIFT_4K); table_group->tables[0] = tbl; @@ -2136,7 +2139,7 @@ static long spapr_tce_create_table(struct iommu_table_group *table_group, int nu /* New table for using DDW instead of the default DMA window */ iommu_table_setparms_common(tbl, pci->phb->bus->number, create.liobn, win_addr, 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops); - iommu_init_table(tbl, pci->phb->node, start, end); + iommu_init_table(tbl, pci->phb->node, start >> page_shift, end >> page_shift); pci->table_group->tables[num] = tbl; set_iommu_table_base(&pdev->dev, tbl); From 200f22fa48a8c670a1ba66d18d810c51055e6ae9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 18 Dec 2024 22:31:59 +1100 Subject: [PATCH 18/19] powerpc/prom_init: Use IS_ENABLED() Use IS_ENABLED() for the device tree checks, so that more code is checked by the compiler without having to build all the different configurations. Suggested-by: Rob Herring Signed-off-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241218113159.422821-1-mpe@ellerman.id.au --- arch/powerpc/kernel/prom_init.c | 39 ++++++++++++--------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 8e776ba39497..57082fac4668 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2792,7 +2792,6 @@ static void __init flatten_device_tree(void) dt_struct_start, dt_struct_end); } -#ifdef CONFIG_PPC_CHRP /* * Pegasos and BriQ lacks the "ranges" property in the isa node * Pegasos needs decimal IRQ 14/15, not hexadecimal @@ -2843,11 +2842,7 @@ static void __init fixup_device_tree_chrp(void) } } } -#else -#define fixup_device_tree_chrp() -#endif -#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) static void __init fixup_device_tree_pmac64(void) { phandle u3, i2c, mpic; @@ -2887,11 +2882,7 @@ static void __init fixup_device_tree_pmac64(void) prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent", &parent, sizeof(parent)); } -#else -#define fixup_device_tree_pmac64() -#endif -#ifdef CONFIG_PPC_PMAC static void __init fixup_device_tree_pmac(void) { __be32 val = 1; @@ -2911,11 +2902,7 @@ static void __init fixup_device_tree_pmac(void) prom_setprop(node, NULL, "#size-cells", &val, sizeof(val)); } } -#else -static inline void fixup_device_tree_pmac(void) { } -#endif -#ifdef CONFIG_PPC_EFIKA /* * The MPC5200 FEC driver requires an phy-handle property to tell it how * to talk to the phy. If the phy-handle property is missing, then this @@ -3047,11 +3034,7 @@ static void __init fixup_device_tree_efika(void) /* Make sure ethernet phy-handle property exists */ fixup_device_tree_efika_add_phy(); } -#else -#define fixup_device_tree_efika() -#endif -#ifdef CONFIG_PPC_PASEMI_NEMO /* * CFE supplied on Nemo is broken in several ways, biggest * problem is that it reassigns ISA interrupts to unused mpic ints. @@ -3127,17 +3110,23 @@ static void __init fixup_device_tree_pasemi(void) prom_setprop(iob, name, "device_type", "isa", sizeof("isa")); } -#else /* !CONFIG_PPC_PASEMI_NEMO */ -static inline void fixup_device_tree_pasemi(void) { } -#endif static void __init fixup_device_tree(void) { - fixup_device_tree_chrp(); - fixup_device_tree_pmac(); - fixup_device_tree_pmac64(); - fixup_device_tree_efika(); - fixup_device_tree_pasemi(); + if (IS_ENABLED(CONFIG_PPC_CHRP)) + fixup_device_tree_chrp(); + + if (IS_ENABLED(CONFIG_PPC_PMAC)) + fixup_device_tree_pmac(); + + if (IS_ENABLED(CONFIG_PPC_PMAC) && IS_ENABLED(CONFIG_PPC64)) + fixup_device_tree_pmac64(); + + if (IS_ENABLED(CONFIG_PPC_EFIKA)) + fixup_device_tree_efika(); + + if (IS_ENABLED(CONFIG_PPC_PASEMI_NEMO)) + fixup_device_tree_pasemi(); } static void __init prom_find_boot_cpu(void) From 2bf66e66d2e6feece6175ec09ec590a0a8563bdd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 18 Dec 2024 22:43:47 +1100 Subject: [PATCH 19/19] selftests/powerpc: Fix argument order to timer_sub() Commit c814bf958926 ("powerpc/selftests: Use timersub() for gettimeofday()"), got the order of arguments to timersub() wrong, leading to a negative time delta being reported, eg: test: gettimeofday tags: git_version:v6.12-rc5-409-gdddf291c3030 time = -3.297781 success: gettimeofday The correct order is minuend, subtrahend, which in this case is end, start. Which gives: test: gettimeofday tags: git_version:v6.12-rc5-409-gdddf291c3030-dirty time = 3.300650 success: gettimeofday Fixes: c814bf958926 ("powerpc/selftests: Use timersub() for gettimeofday()") Signed-off-by: Michael Ellerman Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20241218114347.428108-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/benchmarks/gettimeofday.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c index 580fcac0a09f..b71ef8a493ed 100644 --- a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c +++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c @@ -20,7 +20,7 @@ static int test_gettimeofday(void) gettimeofday(&tv_end, NULL); } - timersub(&tv_start, &tv_end, &tv_diff); + timersub(&tv_end, &tv_start, &tv_diff); printf("time = %.6f\n", tv_diff.tv_sec + (tv_diff.tv_usec) * 1e-6);