diff --git a/.mailmap b/.mailmap index 225546cc8028..60a60d10ba7c 100644 --- a/.mailmap +++ b/.mailmap @@ -122,6 +122,8 @@ Henk Vergonet Henrik Kretzschmar Henrik Rydberg Herbert Xu +Huacai Chen +Huacai Chen Jacob Shin Jaegeuk Kim Jaegeuk Kim diff --git a/Documentation/ABI/testing/sysfs-class-rnbd-client b/Documentation/ABI/testing/sysfs-class-rnbd-client index 00c0286733d4..2aa05b3e348e 100644 --- a/Documentation/ABI/testing/sysfs-class-rnbd-client +++ b/Documentation/ABI/testing/sysfs-class-rnbd-client @@ -66,7 +66,7 @@ Description: Expected format is the following:: The rnbd_server prepends the received from client with and tries to open the / block device. On success, - a /dev/rnbd device file, a /sys/block/rnbd_client/rnbd/ + a /dev/rnbd device file, a /sys/block/rnbd/ directory and an entry in /sys/class/rnbd-client/ctl/devices will be created. @@ -95,12 +95,12 @@ Description: Expected format is the following:: --------------------------------- After mapping, the device file can be found by: - o The symlink /sys/class/rnbd-client/ctl/devices/ + o The symlink /sys/class/rnbd-client/ctl/devices/@ points to /sys/block/. The last part of the symlink destination is the same as the device name. By extracting the last part of the path the path to the device /dev/ can be build. - * /dev/block/$(cat /sys/class/rnbd-client/ctl/devices//dev) + * /dev/block/$(cat /sys/class/rnbd-client/ctl/devices/@/dev) How to find the of the device is described on the next section. @@ -110,7 +110,7 @@ Date: Feb 2020 KernelVersion: 5.7 Contact: Jack Wang Danil Kipnis Description: For each device mapped on the client a new symbolic link is created as - /sys/class/rnbd-client/ctl/devices/, which points + /sys/class/rnbd-client/ctl/devices/@, which points to the block device created by rnbd (/sys/block/rnbd/). The of each device is created as follows: diff --git a/Documentation/ABI/testing/sysfs-class-rnbd-server b/Documentation/ABI/testing/sysfs-class-rnbd-server index ba60a90c0e45..6c5996cd7cfb 100644 --- a/Documentation/ABI/testing/sysfs-class-rnbd-server +++ b/Documentation/ABI/testing/sysfs-class-rnbd-server @@ -48,3 +48,11 @@ Date: Feb 2020 KernelVersion: 5.7 Contact: Jack Wang Danil Kipnis Description: Contains the device access mode: ro, rw or migration. + +What: /sys/class/rnbd-server/ctl/devices//sessions//force_close +Date: Nov 2020 +KernelVersion: 5.10 +Contact: Jack Wang Danil Kipnis +Description: Write "1" to the file to close the device on server side. Please + note that the client side device will not be closed, read or + write to the device will get -ENOTCONN. diff --git a/Documentation/ABI/testing/sysfs-firmware-lefi-boardinfo b/Documentation/ABI/testing/sysfs-firmware-lefi-boardinfo new file mode 100644 index 000000000000..5e3f6148c52e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-lefi-boardinfo @@ -0,0 +1,35 @@ +What: /sys/firmware/lefi/boardinfo +Date: October 2020 +Contact: Tiezhu Yang +Description: + Get mainboard and BIOS info easily on the Loongson platform, + this is useful to point out the current used mainboard type + and BIOS version when there exists problems related with + hardware or firmware. + + The related structures are already defined in the interface + specification about firmware and kernel which are common + requirement and specific for Loongson64, so only add a new + boardinfo.c file in arch/mips/loongson64. + + For example: + + [loongson@linux ~]$ cat /sys/firmware/lefi/boardinfo + Board Info + Manufacturer : LEMOTE + Board Name : LEMOTE-LS3A4000-7A1000-1w-V01-pc + Family : LOONGSON3 + + BIOS Info + Vendor : Kunlun + Version : Kunlun-A1901-V4.1.3-20200414093938 + ROM Size : 4 KB + Release Date : 2020-04-14 + + By the way, using dmidecode command can get the similar info if there + exists SMBIOS in firmware, but the fact is that there is no SMBIOS on + some machines, we can see nothing when execute dmidecode, like this: + + [root@linux loongson]# dmidecode + # dmidecode 2.12 + # No SMBIOS nor DMI entry point found, sorry. diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst index 6898aba9fb5c..49d8149f8d32 100644 --- a/Documentation/admin-guide/pstore-blk.rst +++ b/Documentation/admin-guide/pstore-blk.rst @@ -35,7 +35,7 @@ module parameters have priority over Kconfig. Here is an example for module parameters:: - pstore_blk.blkdev=179:7 pstore_blk.kmsg_size=64 + pstore_blk.blkdev=/dev/mmcblk0p7 pstore_blk.kmsg_size=64 best_effort=y The detail of each configurations may be of interest to you. @@ -151,10 +151,7 @@ otherwise KMSG_DUMP_MAX. Configurations for driver ------------------------- -Only a block device driver cares about these configurations. A block device -driver uses ``register_pstore_blk`` to register to pstore/blk. - -A non-block device driver uses ``register_pstore_device`` with +A device driver uses ``register_pstore_device`` with ``struct pstore_device_info`` to register to pstore/blk. .. kernel-doc:: fs/pstore/blk.c diff --git a/Documentation/devicetree/bindings/mips/mscc.txt b/Documentation/devicetree/bindings/mips/mscc.txt index bc817e984628..cc916eaeed0a 100644 --- a/Documentation/devicetree/bindings/mips/mscc.txt +++ b/Documentation/devicetree/bindings/mips/mscc.txt @@ -4,7 +4,7 @@ Boards with a SoC of the Microsemi MIPS family shall have the following properties: Required properties: -- compatible: "mscc,ocelot" +- compatible: "mscc,ocelot", "mscc,luton", "mscc,serval" or "mscc,jr2" * Other peripherals: diff --git a/Documentation/devicetree/bindings/reset/brcm,bcm6345-reset.yaml b/Documentation/devicetree/bindings/reset/brcm,bcm6345-reset.yaml new file mode 100644 index 000000000000..560cf6522cb8 --- /dev/null +++ b/Documentation/devicetree/bindings/reset/brcm,bcm6345-reset.yaml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/reset/brcm,bcm6345-reset.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: BCM6345 reset controller + +description: This document describes the BCM6345 reset controller. + +maintainers: + - Álvaro Fernández Rojas + +properties: + compatible: + const: brcm,bcm6345-reset + + reg: + maxItems: 1 + + "#reset-cells": + const: 1 + +required: + - compatible + - reg + - "#reset-cells" + +additionalProperties: false + +examples: + - | + reset-controller@10000010 { + compatible = "brcm,bcm6345-reset"; + reg = <0x10000010 0x4>; + #reset-cells = <1>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 30d74c34e537..d3c67e99fe47 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7339,7 +7339,6 @@ F: drivers/staging/gasket/ GCC PLUGINS M: Kees Cook -R: Emese Revfy L: linux-hardening@vger.kernel.org S: Maintained F: Documentation/kbuild/gcc-plugins.rst @@ -8744,19 +8743,16 @@ F: include/uapi/rdma/ F: samples/bpf/ibumad_kern.c F: samples/bpf/ibumad_user.c -INGENIC JZ4780 DMA Driver -M: Zubair Lutfullah Kakakhel -S: Maintained -F: drivers/dma/dma-jz4780.c - INGENIC JZ4780 NAND DRIVER M: Harvey Hunt L: linux-mtd@lists.infradead.org +L: linux-mips@vger.kernel.org S: Maintained F: drivers/mtd/nand/raw/ingenic/ INGENIC JZ47xx SoCs M: Paul Cercueil +L: linux-mips@vger.kernel.org S: Maintained F: arch/mips/boot/dts/ingenic/ F: arch/mips/generic/board-ingenic.c @@ -9718,7 +9714,7 @@ F: arch/arm64/kvm/ F: include/kvm/arm_* KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips) -M: Huacai Chen +M: Huacai Chen M: Aleksandar Markovic L: linux-mips@vger.kernel.org L: kvm@vger.kernel.org @@ -11860,7 +11856,7 @@ F: drivers/*/*/*loongson2* F: drivers/*/*loongson2* MIPS/LOONGSON64 ARCHITECTURE -M: Huacai Chen +M: Huacai Chen M: Jiaxun Yang L: linux-mips@vger.kernel.org S: Maintained diff --git a/arch/Kconfig b/arch/Kconfig index 96992b01d806..d4bdc19ed3ad 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -486,6 +486,9 @@ config HAVE_ARCH_SECCOMP_FILTER - secure_computing return value is checked and a return value of -1 results in the system call being skipped immediately. - seccomp syscall wired up + - if !HAVE_SPARSE_SYSCALL_NR, have SECCOMP_ARCH_NATIVE, + SECCOMP_ARCH_NATIVE_NR, SECCOMP_ARCH_NATIVE_NAME defined. If + COMPAT is supported, have the SECCOMP_ARCH_COMPAT* defines too. config SECCOMP prompt "Enable seccomp to safely execute untrusted bytecode" @@ -514,6 +517,20 @@ config SECCOMP_FILTER See Documentation/userspace-api/seccomp_filter.rst for details. +config SECCOMP_CACHE_DEBUG + bool "Show seccomp filter cache status in /proc/pid/seccomp_cache" + depends on SECCOMP_FILTER && !HAVE_SPARSE_SYSCALL_NR + depends on PROC_FS + help + This enables the /proc/pid/seccomp_cache interface to monitor + seccomp cache data. The file format is subject to change. Reading + the file requires CAP_SYS_ADMIN. + + This option is for debugging only. Enabling presents the risk that + an adversary may be able to infer the seccomp filter logic. + + If unsure, say N. + config HAVE_ARCH_STACKLEAK bool help diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 807d7b9a1860..2592356e3215 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -62,6 +62,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SYSCALL_AUDIT 4 /* syscall audit active */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */ #define TIF_MEMDIE 13 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 14 /* idle is polling for TIF_NEED_RESCHED */ @@ -71,6 +72,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define _TIF_NEED_RESCHED (1< + +#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_ARM +#define SECCOMP_ARCH_NATIVE_NR NR_syscalls +#define SECCOMP_ARCH_NATIVE_NAME "arm" + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 536b6b979f63..eb7ce2747eb0 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -126,6 +126,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, * thread information flags: * TIF_USEDFPU - FPU was used by this task this quantum (SMP) * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED + * + * Any bit in the range of 0..15 will cause do_work_pending() to be invoked. */ #define TIF_SIGPENDING 0 /* signal pending */ #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ @@ -135,6 +137,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ +#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -148,6 +151,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) /* Checks for any syscall work in entry-common.S */ @@ -158,7 +162,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, * Change these and you break ASM code in entry-common.S */ #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE) + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NOTIFY_SIGNAL) #endif /* __KERNEL__ */ #endif /* __ASM_ARM_THREAD_INFO_H */ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 271cb8a1eba1..77d16390a524 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -53,7 +53,7 @@ __ret_fast_syscall: cmp r2, #TASK_SIZE blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + movs r1, r1, lsl #16 bne fast_work_pending @@ -90,7 +90,7 @@ __ret_fast_syscall: cmp r2, #TASK_SIZE blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK + movs r1, r1, lsl #16 beq no_work_pending UNWIND(.fnend ) ENDPROC(ret_fast_syscall) @@ -131,7 +131,7 @@ ENTRY(ret_to_user_from_irq) cmp r2, #TASK_SIZE blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] - tst r1, #_TIF_WORK_MASK + movs r1, r1, lsl #16 bne slow_work_pending no_work_pending: asm_trace_hardirqs_on save = 0 diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index de1f20624be1..d0e898608d30 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -59,7 +59,7 @@ __irq_entry: get_thread_info tsk ldr r2, [tsk, #TI_FLAGS] - tst r2, #_TIF_WORK_MASK + movs r2, r2, lsl #16 beq 2f @ no work pending mov r0, #V7M_SCB_ICSR_PENDSVSET str r0, [r1, V7M_SCB_ICSR] @ raise PendSV diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 585edbfccf6d..9d2e916121be 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -655,7 +655,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) if (unlikely(!user_mode(regs))) return 0; local_irq_enable(); - if (thread_flags & _TIF_SIGPENDING) { + if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { int restart = do_signal(regs, syscall); if (unlikely(restart)) { /* diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h index c36387170936..30256233788b 100644 --- a/arch/arm64/include/asm/seccomp.h +++ b/arch/arm64/include/asm/seccomp.h @@ -19,4 +19,13 @@ #include +#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_AARCH64 +#define SECCOMP_ARCH_NATIVE_NR NR_syscalls +#define SECCOMP_ARCH_NATIVE_NAME "aarch64" +#ifdef CONFIG_COMPAT +# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_ARM +# define SECCOMP_ARCH_COMPAT_NR __NR_compat_syscalls +# define SECCOMP_ARCH_COMPAT_NAME "arm" +#endif + #endif /* _ASM_SECCOMP_H */ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 2c8cce12f01f..6c06e04ae0dd 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -65,6 +65,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ #define TIF_CHECK_32BIT_AFFINITY 6 /* Check thread affinity for asymmetric AArch32 */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -95,10 +96,12 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_CHECK_32BIT_AFFINITY (1 << TIF_CHECK_32BIT_AFFINITY) #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ + _TIF_NOTIFY_SIGNAL | \ _TIF_CHECK_32BIT_AFFINITY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a6058fb7a2f4..77c02ea5d3b4 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -978,7 +978,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, (void __user *)NULL, current); } - if (thread_flags & _TIF_SIGPENDING) + if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_flags & _TIF_NOTIFY_RESUME) { diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h index f70382844b96..dd8913d57189 100644 --- a/arch/c6x/include/asm/thread_info.h +++ b/arch/c6x/include/asm/thread_info.h @@ -82,6 +82,7 @@ struct thread_info *current_thread_info(void) #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_MEMDIE 17 /* OOM killer killed process */ diff --git a/arch/c6x/kernel/asm-offsets.c b/arch/c6x/kernel/asm-offsets.c index 0f8fde494875..4a264ef87dcb 100644 --- a/arch/c6x/kernel/asm-offsets.c +++ b/arch/c6x/kernel/asm-offsets.c @@ -116,6 +116,7 @@ void foo(void) DEFINE(_TIF_NOTIFY_RESUME, (1< #include +#include #include #include @@ -313,7 +314,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags, int syscall) { /* deal with pending signal delivery */ - if (thread_info_flags & (1 << TIF_SIGPENDING)) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs, syscall); if (thread_info_flags & (1 << TIF_NOTIFY_RESUME)) diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index 64876e59e2ef..93372255984d 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -4,6 +4,5 @@ generic-y += gpio.h generic-y += kvm_para.h generic-y += local64.h generic-y += qrwlock.h -generic-y += seccomp.h generic-y += user.h generic-y += vmlinux.lds.h diff --git a/arch/csky/include/asm/seccomp.h b/arch/csky/include/asm/seccomp.h new file mode 100644 index 000000000000..d33e758126fb --- /dev/null +++ b/arch/csky/include/asm/seccomp.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include + +#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_CSKY +#define SECCOMP_ARCH_NATIVE_NR NR_syscalls +#define SECCOMP_ARCH_NATIVE_NAME "csky" + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/csky/include/asm/thread_info.h b/arch/csky/include/asm/thread_info.h index 68e7a1227170..21456a3737c2 100644 --- a/arch/csky/include/asm/thread_info.h +++ b/arch/csky/include/asm/thread_info.h @@ -64,6 +64,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ #define TIF_SYSCALL_TRACEPOINT 5 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 6 /* syscall auditing */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_POLLING_NRFLAG 16 /* poll_idle() is TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */ @@ -75,6 +76,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_MEMDIE (1 << TIF_MEMDIE) @@ -82,7 +84,8 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_UPROBE) + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NOTIFY_SIGNAL) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 8b068cf37447..37ea64ed3c12 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -257,7 +257,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, uprobe_notify_resume(regs); /* Handle pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) { diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index 0cdaa302d3d2..a518214d4ddd 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -73,6 +73,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SYSCALL_TRACEPOINT 8 /* for ftrace syscall instrumentation */ #define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling TIF_NEED_RESCHED */ +#define TIF_NOTIFY_SIGNAL 10 /* signal notifications exist */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) @@ -83,6 +84,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) /* work to do in syscall trace */ #define _TIF_WORK_SYSCALL_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \ @@ -92,7 +94,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \ _TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \ _TIF_SINGLESTEP | _TIF_NOTIFY_RESUME | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOTIFY_SIGNAL) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \ diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 75d9b7e626b2..75a1c36b105a 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -279,7 +279,7 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags) { - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h index 563da1986464..535976665bf0 100644 --- a/arch/hexagon/include/asm/thread_info.h +++ b/arch/hexagon/include/asm/thread_info.h @@ -95,6 +95,7 @@ register struct thread_info *__current_thread_info asm(QUOTED_THREADINFO_REG); #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* restore ss @ return to usr mode */ #define TIF_RESTORE_SIGMASK 6 /* restore sig mask in do_signal() */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* OOM killer killed process */ @@ -103,6 +104,7 @@ register struct thread_info *__current_thread_info asm(QUOTED_THREADINFO_REG); #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) /* work to do on interrupt/exception return - All but TIF_SYSCALL_TRACE */ #define _TIF_WORK_MASK (0x0000FFFF & ~_TIF_SYSCALL_TRACE) diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 67767c5ed98c..6a980cba7b29 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -174,7 +174,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) return 1; } - if (thread_info_flags & _TIF_SIGPENDING) { + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { do_signal(regs); return 1; } diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 64a1011f6812..51d20cb37706 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -103,6 +103,7 @@ struct thread_info { #define TIF_SYSCALL_TRACE 2 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notification exist */ #define TIF_NOTIFY_RESUME 6 /* resumption notification requested */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ @@ -115,6 +116,7 @@ struct thread_info { #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_MCA_INIT (1 << TIF_MCA_INIT) @@ -124,7 +126,7 @@ struct thread_info { /* "work to do on user-return" bits */ #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ - _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE) + _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_NOTIFY_SIGNAL) /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ #define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index bfb85d905f83..4ebbfa076a26 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -171,7 +171,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) } /* deal with pending signal delivery */ - if (test_thread_flag(TIF_SIGPENDING)) { + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) { local_irq_enable(); /* force interrupt enable */ ia64_do_signal(scr, in_syscall); } diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h index 3689c6718c88..15a757073fa5 100644 --- a/arch/m68k/include/asm/thread_info.h +++ b/arch/m68k/include/asm/thread_info.h @@ -60,6 +60,7 @@ static inline struct thread_info *current_thread_info(void) * bits 0-7 are tested at every exception exit * bits 8-15 are also tested at syscall exit */ +#define TIF_NOTIFY_SIGNAL 4 #define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ #define TIF_SIGPENDING 6 /* signal pending */ #define TIF_NEED_RESCHED 7 /* rescheduling necessary */ diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 46f91e0f6a08..349570f16a78 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -1133,7 +1133,8 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs) { - if (test_thread_flag(TIF_SIGPENDING)) + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || + test_thread_flag(TIF_SIGPENDING)) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 93d459f016fc..f82795592ce5 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -3,19 +3,17 @@ config MICROBLAZE def_bool y select ARCH_32BIT_OFF_T select ARCH_NO_SWAP - select ARCH_HAS_BINFMT_FLAT if !MMU select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select ARCH_HAS_DMA_SET_UNCACHED if !MMU select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_TABLE_SORT select TIMER_OF select CLONE_BACKWARDS3 select COMMON_CLK - select DMA_DIRECT_REMAP if MMU + select DMA_DIRECT_REMAP select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_IDLE_POLL_SETUP @@ -44,7 +42,7 @@ config MICROBLAZE select TRACING_SUPPORT select VIRT_TO_BUS select CPU_NO_EFFICIENT_FFS - select MMU_GATHER_NO_RANGE if MMU + select MMU_GATHER_NO_RANGE select SPARSE_IRQ select SET_FS @@ -95,8 +93,7 @@ menu "Processor type and features" source "kernel/Kconfig.hz" config MMU - bool "MMU support" - default n + def_bool y comment "Boot options" @@ -142,18 +139,8 @@ config ADVANCED_OPTIONS comment "Default settings for advanced configuration options are used" depends on !ADVANCED_OPTIONS -config XILINX_UNCACHED_SHADOW - bool "Are you using uncached shadow for RAM ?" - depends on ADVANCED_OPTIONS && !MMU - default n - help - This is needed to be able to allocate uncachable memory regions. - The feature requires the design to define the RAM memory controller - window to be twice as large as the actual physical memory. - config HIGHMEM bool "High memory support" - depends on MMU select KMAP_LOCAL help The address space of Microblaze processors is only 4 Gigabytes large @@ -167,7 +154,7 @@ config HIGHMEM config LOWMEM_SIZE_BOOL bool "Set maximum low memory" - depends on ADVANCED_OPTIONS && MMU + depends on ADVANCED_OPTIONS help This option allows you to set the maximum amount of memory which will be used as "low memory", that is, memory which the kernel can @@ -205,12 +192,11 @@ config KERNEL_START_BOOL config KERNEL_START hex "Virtual address of kernel base" if KERNEL_START_BOOL - default "0xc0000000" if MMU - default KERNEL_BASE_ADDR if !MMU + default "0xc0000000" config TASK_SIZE_BOOL bool "Set custom user task size" - depends on ADVANCED_OPTIONS && MMU + depends on ADVANCED_OPTIONS help This option allows you to set the amount of virtual address space allocated to user tasks. This can be useful in optimizing the @@ -222,33 +208,6 @@ config TASK_SIZE hex "Size of user task space" if TASK_SIZE_BOOL default "0x80000000" -choice - prompt "Page size" - default MICROBLAZE_4K_PAGES - depends on ADVANCED_OPTIONS && !MMU - help - Select the kernel logical page size. Increasing the page size - will reduce software overhead at each page boundary, allow - hardware prefetch mechanisms to be more effective, and allow - larger dma transfers increasing IO efficiency and reducing - overhead. However the utilization of memory will increase. - For example, each cached file will using a multiple of the - page size to hold its contents and the difference between the - end of file and the end of page is wasted. - - If unsure, choose 4K_PAGES. - -config MICROBLAZE_4K_PAGES - bool "4k page size" - -config MICROBLAZE_16K_PAGES - bool "16k page size" - -config MICROBLAZE_64K_PAGES - bool "64k page size" - -endchoice - endmenu menu "Bus Options" diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 7b340a35b194..bb980891816d 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -1,11 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 KBUILD_DEFCONFIG := mmu_defconfig -ifeq ($(CONFIG_MMU),y) UTS_SYSNAME = -DUTS_SYSNAME=\"Linux\" -else -UTS_SYSNAME = -DUTS_SYSNAME=\"uClinux\" -endif # What CPU vesion are we building for, and crack it open # as major.minor.rev @@ -67,12 +63,7 @@ DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS))) core-y += $(boot)/dts/ -# defines filename extension depending memory management type -ifeq ($(CONFIG_MMU),) -MMU := -nommu -endif - -export MMU DTB +export DTB all: linux.bin diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig index 9b8a50f30662..51337fffb947 100644 --- a/arch/microblaze/configs/mmu_defconfig +++ b/arch/microblaze/configs/mmu_defconfig @@ -16,7 +16,6 @@ CONFIG_XILINX_MICROBLAZE0_USE_DIV=1 CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL=2 CONFIG_XILINX_MICROBLAZE0_USE_FPU=2 CONFIG_HZ_100=y -CONFIG_MMU=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE_FORCE=y CONFIG_HIGHMEM=y diff --git a/arch/microblaze/configs/nommu_defconfig b/arch/microblaze/configs/nommu_defconfig deleted file mode 100644 index 8c420782d6e4..000000000000 --- a/arch/microblaze/configs/nommu_defconfig +++ /dev/null @@ -1,90 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_AUDIT=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y -# CONFIG_BASE_FULL is not set -CONFIG_KALLSYMS_ALL=y -CONFIG_EMBEDDED=y -CONFIG_SLAB=y -CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR=1 -CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR=1 -CONFIG_XILINX_MICROBLAZE0_USE_BARREL=1 -CONFIG_XILINX_MICROBLAZE0_USE_DIV=1 -CONFIG_XILINX_MICROBLAZE0_USE_HW_MUL=2 -CONFIG_XILINX_MICROBLAZE0_USE_FPU=2 -CONFIG_HZ_100=y -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE_FORCE=y -CONFIG_PCI_XILINX=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -# CONFIG_EFI_PARTITION is not set -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IPV6 is not set -CONFIG_PCI=y -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_RAM=y -CONFIG_MTD_UCLINUX=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_NETDEVICES=y -CONFIG_XILINX_EMACLITE=y -CONFIG_XILINX_LL_TEMAC=y -# CONFIG_INPUT is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y -CONFIG_SERIAL_UARTLITE=y -CONFIG_SERIAL_UARTLITE_CONSOLE=y -# CONFIG_HW_RANDOM is not set -CONFIG_XILINX_HWICAP=y -CONFIG_I2C=y -CONFIG_I2C_XILINX=y -CONFIG_SPI=y -CONFIG_SPI_XILINX=y -CONFIG_GPIOLIB=y -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_XILINX=y -CONFIG_POWER_RESET=y -CONFIG_POWER_RESET_GPIO_RESTART=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_XILINX_WATCHDOG=y -CONFIG_FB=y -CONFIG_FB_XILINX=y -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT3_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_CRAMFS=y -CONFIG_ROMFS_FS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3_ACL=y -CONFIG_NLS=y -CONFIG_KEYS=y -CONFIG_ENCRYPTED_KEYS=y -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_ARC4=y -CONFIG_CRYPTO_DES=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_SLAB=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_SPINLOCK=y diff --git a/arch/microblaze/include/asm/dma.h b/arch/microblaze/include/asm/dma.h index e6cb6d0725af..f801582be912 100644 --- a/arch/microblaze/include/asm/dma.h +++ b/arch/microblaze/include/asm/dma.h @@ -6,14 +6,8 @@ #ifndef _ASM_MICROBLAZE_DMA_H #define _ASM_MICROBLAZE_DMA_H -#ifndef CONFIG_MMU -/* we don't have dma address limit. define it as zero to be - * unlimited. */ -#define MAX_DMA_ADDRESS (0) -#else /* Virtual address corresponding to last available physical memory address. */ #define MAX_DMA_ADDRESS (CONFIG_KERNEL_START + memory_size - 1) -#endif #ifdef CONFIG_PCI extern int isa_dma_bridge_buggy; diff --git a/arch/microblaze/include/asm/exceptions.h b/arch/microblaze/include/asm/exceptions.h index d67e65b72215..967f175173e1 100644 --- a/arch/microblaze/include/asm/exceptions.h +++ b/arch/microblaze/include/asm/exceptions.h @@ -11,11 +11,6 @@ #define _ASM_MICROBLAZE_EXCEPTIONS_H #ifdef __KERNEL__ - -#ifndef CONFIG_MMU -#define EX_HANDLER_STACK_SIZ (4*19) -#endif - #ifndef __ASSEMBLY__ /* Macros to enable and disable HW exceptions in the MSR */ diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h index 1dd6fae41897..b6a57f8468f0 100644 --- a/arch/microblaze/include/asm/io.h +++ b/arch/microblaze/include/asm/io.h @@ -30,15 +30,12 @@ extern resource_size_t isa_mem_base; #define PCI_IOBASE ((void __iomem *)_IO_BASE) #define IO_SPACE_LIMIT (0xFFFFFFFF) -#ifdef CONFIG_MMU #define page_to_bus(page) (page_to_phys(page)) extern void iounmap(volatile void __iomem *addr); extern void __iomem *ioremap(phys_addr_t address, unsigned long size); -#endif /* CONFIG_MMU */ - /* Big Endian */ #define out_be32(a, v) __raw_writel((v), (void __iomem __force *)(a)) #define out_be16(a, v) __raw_writew((v), (a)) diff --git a/arch/microblaze/include/asm/mmu.h b/arch/microblaze/include/asm/mmu.h index 97f1243101cc..b928a87c0076 100644 --- a/arch/microblaze/include/asm/mmu.h +++ b/arch/microblaze/include/asm/mmu.h @@ -8,9 +8,6 @@ #ifndef _ASM_MICROBLAZE_MMU_H #define _ASM_MICROBLAZE_MMU_H -# ifndef CONFIG_MMU -# include -# else /* CONFIG_MMU */ # ifdef __KERNEL__ # ifndef __ASSEMBLY__ @@ -119,5 +116,4 @@ extern u32 tlb_skip; # define TLB_G 0x00000001 /* Memory is guarded from prefetch */ # endif /* __KERNEL__ */ -# endif /* CONFIG_MMU */ #endif /* _ASM_MICROBLAZE_MMU_H */ diff --git a/arch/microblaze/include/asm/mmu_context.h b/arch/microblaze/include/asm/mmu_context.h index 34004efb3def..866e52da5eb9 100644 --- a/arch/microblaze/include/asm/mmu_context.h +++ b/arch/microblaze/include/asm/mmu_context.h @@ -1,6 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_MMU # include -#else -# include -#endif diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index b13463d39b38..bf681f272f72 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -20,13 +20,7 @@ #ifdef __KERNEL__ /* PAGE_SHIFT determines the page size */ -#if defined(CONFIG_MICROBLAZE_64K_PAGES) -#define PAGE_SHIFT 16 -#elif defined(CONFIG_MICROBLAZE_16K_PAGES) -#define PAGE_SHIFT 14 -#else #define PAGE_SHIFT 12 -#endif #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) @@ -44,17 +38,6 @@ #define PAGE_UP(addr) (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1))) #define PAGE_DOWN(addr) ((addr)&(~((PAGE_SIZE)-1))) -#ifndef CONFIG_MMU -/* - * PAGE_OFFSET -- the first address of the first page of memory. When not - * using MMU this corresponds to the first free page in physical memory (aligned - * on a page boundary). - */ -extern unsigned int __page_offset; -#define PAGE_OFFSET __page_offset - -#else /* CONFIG_MMU */ - /* * PAGE_OFFSET -- the first address of the first page of memory. With MMU * it is set to the kernel start address (aligned on a page boundary). @@ -70,8 +53,6 @@ extern unsigned int __page_offset; typedef unsigned long pte_basic_t; #define PTE_FMT "%.8lx" -#endif /* CONFIG_MMU */ - # define copy_page(to, from) memcpy((to), (from), PAGE_SIZE) # define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE) @@ -86,25 +67,12 @@ typedef struct page *pgtable_t; typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pgprot; } pgprot_t; /* FIXME this can depend on linux kernel version */ -# ifdef CONFIG_MMU typedef struct { unsigned long pgd; } pgd_t; -# else /* CONFIG_MMU */ -typedef struct { unsigned long ste[64]; } pmd_t; -typedef struct { pmd_t pue[1]; } pud_t; -typedef struct { pud_t p4e[1]; } p4d_t; -typedef struct { p4d_t pge[1]; } pgd_t; -# endif /* CONFIG_MMU */ # define pte_val(x) ((x).pte) # define pgprot_val(x) ((x).pgprot) -# ifdef CONFIG_MMU # define pgd_val(x) ((x).pgd) -# else /* CONFIG_MMU */ -# define pmd_val(x) ((x).ste[0]) -# define pud_val(x) ((x).pue[0]) -# define pgd_val(x) ((x).pge[0]) -# endif /* CONFIG_MMU */ # define __pte(x) ((pte_t) { (x) }) # define __pgd(x) ((pgd_t) { (x) }) @@ -142,28 +110,12 @@ extern int page_is_ram(unsigned long pfn); # define virt_to_pfn(vaddr) (phys_to_pfn((__pa(vaddr)))) # define pfn_to_virt(pfn) __va(pfn_to_phys((pfn))) -# ifdef CONFIG_MMU - # define virt_to_page(kaddr) (pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)) # define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT) # define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) -# else /* CONFIG_MMU */ -# define virt_to_page(vaddr) (pfn_to_page(virt_to_pfn(vaddr))) -# define page_to_virt(page) (pfn_to_virt(page_to_pfn(page))) -# define page_to_phys(page) (pfn_to_phys(page_to_pfn(page))) -# define page_to_bus(page) (page_to_phys(page)) -# define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr))) -# endif /* CONFIG_MMU */ - -# ifndef CONFIG_MMU -# define pfn_valid(pfn) (((pfn) >= min_low_pfn) && \ - ((pfn) <= (min_low_pfn + max_mapnr))) -# define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) -# else /* CONFIG_MMU */ # define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT) # define pfn_valid(pfn) ((pfn) < (max_mapnr + ARCH_PFN_OFFSET)) -# endif /* CONFIG_MMU */ # endif /* __ASSEMBLY__ */ @@ -174,12 +126,6 @@ extern int page_is_ram(unsigned long pfn); /* Convert between virtual and physical address for MMU. */ /* Handle MicroBlaze processor with virtual memory. */ -#ifndef CONFIG_MMU -#define __virt_to_phys(addr) addr -#define __phys_to_virt(addr) addr -#define tophys(rd, rs) addik rd, rs, 0 -#define tovirt(rd, rs) addik rd, rs, 0 -#else #define __virt_to_phys(addr) \ ((addr) + CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START) #define __phys_to_virt(addr) \ @@ -188,14 +134,9 @@ extern int page_is_ram(unsigned long pfn); addik rd, rs, (CONFIG_KERNEL_BASE_ADDR - CONFIG_KERNEL_START) #define tovirt(rd, rs) \ addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR) -#endif /* CONFIG_MMU */ #define TOPHYS(addr) __virt_to_phys(addr) -#ifdef CONFIG_MMU - -#endif /* CONFIG_MMU */ - #endif /* __KERNEL__ */ #include diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index 8839ce00ea05..d56b9f670ad1 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -8,8 +8,6 @@ #ifndef _ASM_MICROBLAZE_PGALLOC_H #define _ASM_MICROBLAZE_PGALLOC_H -#ifdef CONFIG_MMU - #include /* For min/max macros */ #include #include @@ -42,6 +40,4 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); #define pmd_populate_kernel(mm, pmd, pte) \ (pmd_val(*(pmd)) = (unsigned long) (pte)) -#endif /* CONFIG_MMU */ - #endif /* _ASM_MICROBLAZE_PGALLOC_H */ diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 3fa1df90925e..9ae8d2c17dd5 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -14,47 +14,6 @@ extern int mem_init_done; #endif -#ifndef CONFIG_MMU - -#define pgd_present(pgd) (1) /* pages are always present on non MMU */ -#define pgd_none(pgd) (0) -#define pgd_bad(pgd) (0) -#define pgd_clear(pgdp) -#define kern_addr_valid(addr) (1) - -#define PAGE_NONE __pgprot(0) /* these mean nothing to non MMU */ -#define PAGE_SHARED __pgprot(0) /* these mean nothing to non MMU */ -#define PAGE_COPY __pgprot(0) /* these mean nothing to non MMU */ -#define PAGE_READONLY __pgprot(0) /* these mean nothing to non MMU */ -#define PAGE_KERNEL __pgprot(0) /* these mean nothing to non MMU */ - -#define pgprot_noncached(x) (x) -#define pgprot_writecombine pgprot_noncached -#define pgprot_device pgprot_noncached - -#define __swp_type(x) (0) -#define __swp_offset(x) (0) -#define __swp_entry(typ, off) ((swp_entry_t) { ((typ) | ((off) << 7)) }) -#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) - -#define ZERO_PAGE(vaddr) ({ BUG(); NULL; }) - -#define swapper_pg_dir ((pgd_t *) NULL) - -#define arch_enter_lazy_cpu_mode() do {} while (0) - -#define pgprot_noncached_wc(prot) prot - -/* - * All 32bit addresses are effectively valid for vmalloc... - * Sort of meaningless for non-VM targets. - */ -#define VMALLOC_START 0 -#define VMALLOC_END 0xffffffff - -#else /* CONFIG_MMU */ - #include #ifdef __KERNEL__ @@ -491,8 +450,6 @@ void __init *early_get_page(void); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -#endif /* CONFIG_MMU */ - #ifndef __ASSEMBLY__ extern unsigned long ioremap_bot, ioremap_base; diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 616211871a6e..06c6e493590a 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -31,42 +31,6 @@ extern void ret_from_kernel_thread(void); # endif /* __ASSEMBLY__ */ -# ifndef CONFIG_MMU -/* - * User space process size: memory size - * - * TASK_SIZE on MMU cpu is usually 1GB. However, on no-MMU arch, both - * user processes and the kernel is on the same memory region. They - * both share the memory space and that is limited by the amount of - * physical memory. thus, we set TASK_SIZE == amount of total memory. - */ -# define TASK_SIZE (0x81000000 - 0x80000000) - -/* - * This decides where the kernel will search for a free chunk of vm - * space during mmap's. We won't be using it - */ -# define TASK_UNMAPPED_BASE 0 - -/* definition in include/linux/sched.h */ -struct task_struct; - -/* thread_struct is gone. use thread_info instead. */ -struct thread_struct { }; -# define INIT_THREAD { } - -/* Free all resources held by a thread. */ -static inline void release_thread(struct task_struct *dead_task) -{ -} - -extern unsigned long get_wchan(struct task_struct *p); - -# define KSTK_EIP(tsk) (0) -# define KSTK_ESP(tsk) (0) - -# else /* CONFIG_MMU */ - /* * This is used to define STACK_TOP, and with MMU it must be below * kernel base to select the correct PGD when handling MMU exceptions. @@ -130,5 +94,4 @@ extern struct dentry *of_debugfs_root; #endif # endif /* __ASSEMBLY__ */ -# endif /* CONFIG_MMU */ #endif /* _ASM_MICROBLAZE_PROCESSOR_H */ diff --git a/arch/microblaze/include/asm/registers.h b/arch/microblaze/include/asm/registers.h index ee81e1cba008..6b36693fc621 100644 --- a/arch/microblaze/include/asm/registers.h +++ b/arch/microblaze/include/asm/registers.h @@ -27,7 +27,6 @@ #define FSR_UF (1<<1) /* Underflow */ #define FSR_DO (1<<0) /* Denormalized operand error */ -# ifdef CONFIG_MMU /* Machine State Register (MSR) Fields */ # define MSR_UM (1<<11) /* User Mode */ # define MSR_UMS (1<<12) /* User Mode Save */ @@ -43,5 +42,4 @@ # define ESR_DIZ (1<<11) /* Zone Protection */ # define ESR_S (1<<10) /* Store instruction */ -# endif /* CONFIG_MMU */ #endif /* _ASM_MICROBLAZE_REGISTERS_H */ diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index be10da9d87cb..a06cc1f97aa9 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -14,9 +14,7 @@ extern char cmd_line[COMMAND_LINE_SIZE]; extern char *klimit; -# ifdef CONFIG_MMU extern void mmu_reset(void); -# endif /* CONFIG_MMU */ void time_init(void); void init_IRQ(void); diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index ad8e8fcb90d3..44f5ca331862 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -107,6 +107,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ /* restore singlestep on return to user mode */ #define TIF_SINGLESTEP 4 +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_MEMDIE 6 /* is terminating due to OOM killer */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ #define TIF_SECCOMP 10 /* secure computing */ @@ -119,6 +120,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) diff --git a/arch/microblaze/include/asm/tlbflush.h b/arch/microblaze/include/asm/tlbflush.h index 1200e2bf14bb..2038168ed128 100644 --- a/arch/microblaze/include/asm/tlbflush.h +++ b/arch/microblaze/include/asm/tlbflush.h @@ -8,8 +8,6 @@ #ifndef _ASM_MICROBLAZE_TLBFLUSH_H #define _ASM_MICROBLAZE_TLBFLUSH_H -#ifdef CONFIG_MMU - #include #include #include /* For TASK_SIZE */ @@ -50,16 +48,4 @@ static inline void local_flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long end) { } -#else /* CONFIG_MMU */ - -#define flush_tlb() BUG() -#define flush_tlb_all() BUG() -#define flush_tlb_mm(mm) BUG() -#define flush_tlb_page(vma, addr) BUG() -#define flush_tlb_range(mm, start, end) BUG() -#define flush_tlb_pgtables(mm, start, end) BUG() -#define flush_tlb_kernel_range(start, end) BUG() - -#endif /* CONFIG_MMU */ - #endif /* _ASM_MICROBLAZE_TLBFLUSH_H */ diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 304b04ffea2f..c44b59470e45 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -30,35 +30,14 @@ */ # define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) -# ifndef CONFIG_MMU -# define KERNEL_DS MAKE_MM_SEG(0) -# define USER_DS KERNEL_DS -# else # define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) # define USER_DS MAKE_MM_SEG(TASK_SIZE - 1) -# endif # define get_fs() (current_thread_info()->addr_limit) # define set_fs(val) (current_thread_info()->addr_limit = (val)) # define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg) -#ifndef CONFIG_MMU - -/* Check against bounds of physical memory */ -static inline int ___range_ok(unsigned long addr, unsigned long size) -{ - return ((addr < memory_start) || - ((addr + size - 1) > (memory_start + memory_size - 1))); -} - -#define __range_ok(addr, size) \ - ___range_ok((unsigned long)(addr), (unsigned long)(size)) - -#define access_ok(addr, size) (__range_ok((addr), (size)) == 0) - -#else - static inline int access_ok(const void __user *addr, unsigned long size) { if (!size) @@ -77,15 +56,9 @@ static inline int access_ok(const void __user *addr, unsigned long size) (u32)get_fs().seg); return 1; } -#endif -#ifdef CONFIG_MMU # define __FIXUP_SECTION ".section .fixup,\"ax\"\n" # define __EX_TABLE_SECTION ".section __ex_table,\"a\"\n" -#else -# define __FIXUP_SECTION ".section .discard,\"ax\"\n" -# define __EX_TABLE_SECTION ".section .discard,\"ax\"\n" -#endif extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile index dd71637437f4..15a20eb814ce 100644 --- a/arch/microblaze/kernel/Makefile +++ b/arch/microblaze/kernel/Makefile @@ -22,9 +22,9 @@ obj-y += dma.o exceptions.o \ obj-y += cpu/ obj-$(CONFIG_MODULES) += microblaze_ksyms.o module.o -obj-$(CONFIG_MMU) += misc.o +obj-y += misc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o mcount.o obj-$(CONFIG_KGDB) += kgdb.o -obj-y += entry$(MMU).o +obj-y += entry.o diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c index c1b459c97571..6c69ce7be2e8 100644 --- a/arch/microblaze/kernel/asm-offsets.c +++ b/arch/microblaze/kernel/asm-offsets.c @@ -70,7 +70,6 @@ int main(int argc, char *argv[]) /* struct task_struct */ DEFINE(TS_THREAD_INFO, offsetof(struct task_struct, stack)); -#ifdef CONFIG_MMU DEFINE(TASK_STATE, offsetof(struct task_struct, state)); DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags)); DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace)); @@ -84,7 +83,6 @@ int main(int argc, char *argv[]) DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); BLANK(); -#endif /* struct thread_info */ DEFINE(TI_TASK, offsetof(struct thread_info, task)); diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S deleted file mode 100644 index 7e394fc2c439..000000000000 --- a/arch/microblaze/kernel/entry-nommu.S +++ /dev/null @@ -1,622 +0,0 @@ -/* - * Copyright (C) 2007-2009 Michal Simek - * Copyright (C) 2007-2009 PetaLogix - * Copyright (C) 2006 Atmark Techno, Inc. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR - .macro disable_irq - msrclr r0, MSR_IE - .endm - - .macro enable_irq - msrset r0, MSR_IE - .endm - - .macro clear_bip - msrclr r0, MSR_BIP - .endm -#else - .macro disable_irq - mfs r11, rmsr - andi r11, r11, ~MSR_IE - mts rmsr, r11 - .endm - - .macro enable_irq - mfs r11, rmsr - ori r11, r11, MSR_IE - mts rmsr, r11 - .endm - - .macro clear_bip - mfs r11, rmsr - andi r11, r11, ~MSR_BIP - mts rmsr, r11 - .endm -#endif - -ENTRY(_interrupt) - swi r1, r0, PER_CPU(ENTRY_SP) /* save the current sp */ - swi r11, r0, PER_CPU(R11_SAVE) /* temporarily save r11 */ - lwi r11, r0, PER_CPU(KM) /* load mode indicator */ - beqid r11, 1f - nop - brid 2f /* jump over */ - addik r1, r1, (-PT_SIZE) /* room for pt_regs (delay slot) */ -1: /* switch to kernel stack */ - lwi r1, r0, PER_CPU(CURRENT_SAVE) /* get the saved current */ - lwi r1, r1, TS_THREAD_INFO /* get the thread info */ - /* calculate kernel stack pointer */ - addik r1, r1, THREAD_SIZE - PT_SIZE -2: - swi r11, r1, PT_MODE /* store the mode */ - lwi r11, r0, PER_CPU(R11_SAVE) /* reload r11 */ - swi r2, r1, PT_R2 - swi r3, r1, PT_R3 - swi r4, r1, PT_R4 - swi r5, r1, PT_R5 - swi r6, r1, PT_R6 - swi r7, r1, PT_R7 - swi r8, r1, PT_R8 - swi r9, r1, PT_R9 - swi r10, r1, PT_R10 - swi r11, r1, PT_R11 - swi r12, r1, PT_R12 - swi r13, r1, PT_R13 - swi r14, r1, PT_R14 - swi r14, r1, PT_PC - swi r15, r1, PT_R15 - swi r16, r1, PT_R16 - swi r17, r1, PT_R17 - swi r18, r1, PT_R18 - swi r19, r1, PT_R19 - swi r20, r1, PT_R20 - swi r21, r1, PT_R21 - swi r22, r1, PT_R22 - swi r23, r1, PT_R23 - swi r24, r1, PT_R24 - swi r25, r1, PT_R25 - swi r26, r1, PT_R26 - swi r27, r1, PT_R27 - swi r28, r1, PT_R28 - swi r29, r1, PT_R29 - swi r30, r1, PT_R30 - swi r31, r1, PT_R31 - /* special purpose registers */ - mfs r11, rmsr - swi r11, r1, PT_MSR - mfs r11, rear - swi r11, r1, PT_EAR - mfs r11, resr - swi r11, r1, PT_ESR - mfs r11, rfsr - swi r11, r1, PT_FSR - /* reload original stack pointer and save it */ - lwi r11, r0, PER_CPU(ENTRY_SP) - swi r11, r1, PT_R1 - /* update mode indicator we are in kernel mode */ - addik r11, r0, 1 - swi r11, r0, PER_CPU(KM) - /* restore r31 */ - lwi r31, r0, PER_CPU(CURRENT_SAVE) - /* prepare the link register, the argument and jump */ - addik r15, r0, ret_from_intr - 8 - addk r6, r0, r15 - braid do_IRQ - add r5, r0, r1 - -ret_from_intr: - lwi r11, r1, PT_MODE - bneid r11, no_intr_resched - -3: - lwi r6, r31, TS_THREAD_INFO /* get thread info */ - lwi r19, r6, TI_FLAGS /* get flags in thread info */ - /* do an extra work if any bits are set */ - - andi r11, r19, _TIF_NEED_RESCHED - beqi r11, 1f - bralid r15, schedule - nop - bri 3b -1: andi r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME - beqid r11, no_intr_resched - addk r5, r1, r0 - bralid r15, do_notify_resume - addk r6, r0, r0 - bri 3b - -no_intr_resched: - /* Disable interrupts, we are now committed to the state restore */ - disable_irq - - /* save mode indicator */ - lwi r11, r1, PT_MODE - swi r11, r0, PER_CPU(KM) - - /* save r31 */ - swi r31, r0, PER_CPU(CURRENT_SAVE) -restore_context: - /* special purpose registers */ - lwi r11, r1, PT_FSR - mts rfsr, r11 - lwi r11, r1, PT_ESR - mts resr, r11 - lwi r11, r1, PT_EAR - mts rear, r11 - lwi r11, r1, PT_MSR - mts rmsr, r11 - - lwi r31, r1, PT_R31 - lwi r30, r1, PT_R30 - lwi r29, r1, PT_R29 - lwi r28, r1, PT_R28 - lwi r27, r1, PT_R27 - lwi r26, r1, PT_R26 - lwi r25, r1, PT_R25 - lwi r24, r1, PT_R24 - lwi r23, r1, PT_R23 - lwi r22, r1, PT_R22 - lwi r21, r1, PT_R21 - lwi r20, r1, PT_R20 - lwi r19, r1, PT_R19 - lwi r18, r1, PT_R18 - lwi r17, r1, PT_R17 - lwi r16, r1, PT_R16 - lwi r15, r1, PT_R15 - lwi r14, r1, PT_PC - lwi r13, r1, PT_R13 - lwi r12, r1, PT_R12 - lwi r11, r1, PT_R11 - lwi r10, r1, PT_R10 - lwi r9, r1, PT_R9 - lwi r8, r1, PT_R8 - lwi r7, r1, PT_R7 - lwi r6, r1, PT_R6 - lwi r5, r1, PT_R5 - lwi r4, r1, PT_R4 - lwi r3, r1, PT_R3 - lwi r2, r1, PT_R2 - lwi r1, r1, PT_R1 - rtid r14, 0 - nop - -ENTRY(_reset) - brai 0; - -ENTRY(_user_exception) - swi r1, r0, PER_CPU(ENTRY_SP) /* save the current sp */ - swi r11, r0, PER_CPU(R11_SAVE) /* temporarily save r11 */ - lwi r11, r0, PER_CPU(KM) /* load mode indicator */ - beqid r11, 1f /* Already in kernel mode? */ - nop - brid 2f /* jump over */ - addik r1, r1, (-PT_SIZE) /* Room for pt_regs (delay slot) */ -1: /* Switch to kernel stack */ - lwi r1, r0, PER_CPU(CURRENT_SAVE) /* get the saved current */ - lwi r1, r1, TS_THREAD_INFO /* get the thread info */ - /* calculate kernel stack pointer */ - addik r1, r1, THREAD_SIZE - PT_SIZE -2: - swi r11, r1, PT_MODE /* store the mode */ - lwi r11, r0, PER_CPU(R11_SAVE) /* reload r11 */ - /* save them on stack */ - swi r2, r1, PT_R2 - swi r3, r1, PT_R3 /* r3: _always_ in clobber list; see unistd.h */ - swi r4, r1, PT_R4 /* r4: _always_ in clobber list; see unistd.h */ - swi r5, r1, PT_R5 - swi r6, r1, PT_R6 - swi r7, r1, PT_R7 - swi r8, r1, PT_R8 - swi r9, r1, PT_R9 - swi r10, r1, PT_R10 - swi r11, r1, PT_R11 - /* r12: _always_ in clobber list; see unistd.h */ - swi r12, r1, PT_R12 - swi r13, r1, PT_R13 - /* r14: _always_ in clobber list; see unistd.h */ - swi r14, r1, PT_R14 - /* but we want to return to the next inst. */ - addik r14, r14, 0x4 - swi r14, r1, PT_PC /* increment by 4 and store in pc */ - swi r15, r1, PT_R15 - swi r16, r1, PT_R16 - swi r17, r1, PT_R17 - swi r18, r1, PT_R18 - swi r19, r1, PT_R19 - swi r20, r1, PT_R20 - swi r21, r1, PT_R21 - swi r22, r1, PT_R22 - swi r23, r1, PT_R23 - swi r24, r1, PT_R24 - swi r25, r1, PT_R25 - swi r26, r1, PT_R26 - swi r27, r1, PT_R27 - swi r28, r1, PT_R28 - swi r29, r1, PT_R29 - swi r30, r1, PT_R30 - swi r31, r1, PT_R31 - - disable_irq - nop /* make sure IE bit is in effect */ - clear_bip /* once IE is in effect it is safe to clear BIP */ - nop - - /* special purpose registers */ - mfs r11, rmsr - swi r11, r1, PT_MSR - mfs r11, rear - swi r11, r1, PT_EAR - mfs r11, resr - swi r11, r1, PT_ESR - mfs r11, rfsr - swi r11, r1, PT_FSR - /* reload original stack pointer and save it */ - lwi r11, r0, PER_CPU(ENTRY_SP) - swi r11, r1, PT_R1 - /* update mode indicator we are in kernel mode */ - addik r11, r0, 1 - swi r11, r0, PER_CPU(KM) - /* restore r31 */ - lwi r31, r0, PER_CPU(CURRENT_SAVE) - /* re-enable interrupts now we are in kernel mode */ - enable_irq - - /* See if the system call number is valid. */ - addi r11, r12, -__NR_syscalls - bgei r11, 1f /* return to user if not valid */ - /* Figure out which function to use for this system call. */ - /* Note Microblaze barrel shift is optional, so don't rely on it */ - add r12, r12, r12 /* convert num -> ptr */ - addik r30, r0, 1 /* restarts allowed */ - add r12, r12, r12 - lwi r12, r12, sys_call_table /* Get function pointer */ - addik r15, r0, ret_to_user-8 /* set return address */ - bra r12 /* Make the system call. */ - bri 0 /* won't reach here */ -1: - brid ret_to_user /* jump to syscall epilogue */ - addi r3, r0, -ENOSYS /* set errno in delay slot */ - -/* - * Debug traps are like a system call, but entered via brki r14, 0x60 - * All we need to do is send the SIGTRAP signal to current, ptrace and - * do_notify_resume will handle the rest - */ -ENTRY(_debug_exception) - swi r1, r0, PER_CPU(ENTRY_SP) /* save the current sp */ - lwi r1, r0, PER_CPU(CURRENT_SAVE) /* get the saved current */ - lwi r1, r1, TS_THREAD_INFO /* get the thread info */ - addik r1, r1, THREAD_SIZE - PT_SIZE /* get the kernel stack */ - swi r11, r0, PER_CPU(R11_SAVE) /* temporarily save r11 */ - lwi r11, r0, PER_CPU(KM) /* load mode indicator */ -//save_context: - swi r11, r1, PT_MODE /* store the mode */ - lwi r11, r0, PER_CPU(R11_SAVE) /* reload r11 */ - /* save them on stack */ - swi r2, r1, PT_R2 - swi r3, r1, PT_R3 /* r3: _always_ in clobber list; see unistd.h */ - swi r4, r1, PT_R4 /* r4: _always_ in clobber list; see unistd.h */ - swi r5, r1, PT_R5 - swi r6, r1, PT_R6 - swi r7, r1, PT_R7 - swi r8, r1, PT_R8 - swi r9, r1, PT_R9 - swi r10, r1, PT_R10 - swi r11, r1, PT_R11 - /* r12: _always_ in clobber list; see unistd.h */ - swi r12, r1, PT_R12 - swi r13, r1, PT_R13 - /* r14: _always_ in clobber list; see unistd.h */ - swi r14, r1, PT_R14 - swi r14, r1, PT_PC /* Will return to interrupted instruction */ - swi r15, r1, PT_R15 - swi r16, r1, PT_R16 - swi r17, r1, PT_R17 - swi r18, r1, PT_R18 - swi r19, r1, PT_R19 - swi r20, r1, PT_R20 - swi r21, r1, PT_R21 - swi r22, r1, PT_R22 - swi r23, r1, PT_R23 - swi r24, r1, PT_R24 - swi r25, r1, PT_R25 - swi r26, r1, PT_R26 - swi r27, r1, PT_R27 - swi r28, r1, PT_R28 - swi r29, r1, PT_R29 - swi r30, r1, PT_R30 - swi r31, r1, PT_R31 - - disable_irq - nop /* make sure IE bit is in effect */ - clear_bip /* once IE is in effect it is safe to clear BIP */ - nop - - /* special purpose registers */ - mfs r11, rmsr - swi r11, r1, PT_MSR - mfs r11, rear - swi r11, r1, PT_EAR - mfs r11, resr - swi r11, r1, PT_ESR - mfs r11, rfsr - swi r11, r1, PT_FSR - /* reload original stack pointer and save it */ - lwi r11, r0, PER_CPU(ENTRY_SP) - swi r11, r1, PT_R1 - /* update mode indicator we are in kernel mode */ - addik r11, r0, 1 - swi r11, r0, PER_CPU(KM) - /* restore r31 */ - lwi r31, r0, PER_CPU(CURRENT_SAVE) - /* re-enable interrupts now we are in kernel mode */ - enable_irq - - addi r5, r0, SIGTRAP /* sending the trap signal */ - add r6, r0, r31 /* to current */ - bralid r15, send_sig - add r7, r0, r0 /* 3rd param zero */ - - addik r30, r0, 1 /* restarts allowed ??? */ - /* Restore r3/r4 to work around how ret_to_user works */ - lwi r3, r1, PT_R3 - lwi r4, r1, PT_R4 - bri ret_to_user - -ENTRY(_break) - bri 0 - -/* struct task_struct *_switch_to(struct thread_info *prev, - struct thread_info *next); */ -ENTRY(_switch_to) - /* prepare return value */ - addk r3, r0, r31 - - /* save registers in cpu_context */ - /* use r11 and r12, volatile registers, as temp register */ - addik r11, r5, TI_CPU_CONTEXT - swi r1, r11, CC_R1 - swi r2, r11, CC_R2 - /* skip volatile registers. - * they are saved on stack when we jumped to _switch_to() */ - /* dedicated registers */ - swi r13, r11, CC_R13 - swi r14, r11, CC_R14 - swi r15, r11, CC_R15 - swi r16, r11, CC_R16 - swi r17, r11, CC_R17 - swi r18, r11, CC_R18 - /* save non-volatile registers */ - swi r19, r11, CC_R19 - swi r20, r11, CC_R20 - swi r21, r11, CC_R21 - swi r22, r11, CC_R22 - swi r23, r11, CC_R23 - swi r24, r11, CC_R24 - swi r25, r11, CC_R25 - swi r26, r11, CC_R26 - swi r27, r11, CC_R27 - swi r28, r11, CC_R28 - swi r29, r11, CC_R29 - swi r30, r11, CC_R30 - /* special purpose registers */ - mfs r12, rmsr - swi r12, r11, CC_MSR - mfs r12, rear - swi r12, r11, CC_EAR - mfs r12, resr - swi r12, r11, CC_ESR - mfs r12, rfsr - swi r12, r11, CC_FSR - - /* update r31, the current */ - lwi r31, r6, TI_TASK - swi r31, r0, PER_CPU(CURRENT_SAVE) - - /* get new process' cpu context and restore */ - addik r11, r6, TI_CPU_CONTEXT - - /* special purpose registers */ - lwi r12, r11, CC_FSR - mts rfsr, r12 - lwi r12, r11, CC_ESR - mts resr, r12 - lwi r12, r11, CC_EAR - mts rear, r12 - lwi r12, r11, CC_MSR - mts rmsr, r12 - /* non-volatile registers */ - lwi r30, r11, CC_R30 - lwi r29, r11, CC_R29 - lwi r28, r11, CC_R28 - lwi r27, r11, CC_R27 - lwi r26, r11, CC_R26 - lwi r25, r11, CC_R25 - lwi r24, r11, CC_R24 - lwi r23, r11, CC_R23 - lwi r22, r11, CC_R22 - lwi r21, r11, CC_R21 - lwi r20, r11, CC_R20 - lwi r19, r11, CC_R19 - /* dedicated registers */ - lwi r18, r11, CC_R18 - lwi r17, r11, CC_R17 - lwi r16, r11, CC_R16 - lwi r15, r11, CC_R15 - lwi r14, r11, CC_R14 - lwi r13, r11, CC_R13 - /* skip volatile registers */ - lwi r2, r11, CC_R2 - lwi r1, r11, CC_R1 - - rtsd r15, 8 - nop - -ENTRY(ret_from_fork) - addk r5, r0, r3 - brlid r15, schedule_tail - nop - swi r31, r1, PT_R31 /* save r31 in user context. */ - /* will soon be restored to r31 in ret_to_user */ - addk r3, r0, r0 - brid ret_to_user - nop - -ENTRY(ret_from_kernel_thread) - brlid r15, schedule_tail - addk r5, r0, r3 - brald r15, r20 - addk r5, r0, r19 - brid ret_to_user - addk r3, r0, r0 - -work_pending: - lwi r11, r1, PT_MODE - bneid r11, 2f -3: - enable_irq - andi r11, r19, _TIF_NEED_RESCHED - beqi r11, 1f - bralid r15, schedule - nop - bri 4f -1: andi r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME - beqi r11, no_work_pending - addk r5, r30, r0 - bralid r15, do_notify_resume - addik r6, r0, 1 - addk r30, r0, r0 /* no restarts from now on */ -4: - disable_irq - lwi r6, r31, TS_THREAD_INFO /* get thread info */ - lwi r19, r6, TI_FLAGS /* get flags in thread info */ - bri 3b - -ENTRY(ret_to_user) - disable_irq - - swi r4, r1, PT_R4 /* return val */ - swi r3, r1, PT_R3 /* return val */ - - lwi r6, r31, TS_THREAD_INFO /* get thread info */ - lwi r19, r6, TI_FLAGS /* get flags in thread info */ - bnei r19, work_pending /* do an extra work if any bits are set */ -no_work_pending: - disable_irq - -2: - /* save r31 */ - swi r31, r0, PER_CPU(CURRENT_SAVE) - /* save mode indicator */ - lwi r18, r1, PT_MODE - swi r18, r0, PER_CPU(KM) -//restore_context: - /* special purpose registers */ - lwi r18, r1, PT_FSR - mts rfsr, r18 - lwi r18, r1, PT_ESR - mts resr, r18 - lwi r18, r1, PT_EAR - mts rear, r18 - lwi r18, r1, PT_MSR - mts rmsr, r18 - - lwi r31, r1, PT_R31 - lwi r30, r1, PT_R30 - lwi r29, r1, PT_R29 - lwi r28, r1, PT_R28 - lwi r27, r1, PT_R27 - lwi r26, r1, PT_R26 - lwi r25, r1, PT_R25 - lwi r24, r1, PT_R24 - lwi r23, r1, PT_R23 - lwi r22, r1, PT_R22 - lwi r21, r1, PT_R21 - lwi r20, r1, PT_R20 - lwi r19, r1, PT_R19 - lwi r18, r1, PT_R18 - lwi r17, r1, PT_R17 - lwi r16, r1, PT_R16 - lwi r15, r1, PT_R15 - lwi r14, r1, PT_PC - lwi r13, r1, PT_R13 - lwi r12, r1, PT_R12 - lwi r11, r1, PT_R11 - lwi r10, r1, PT_R10 - lwi r9, r1, PT_R9 - lwi r8, r1, PT_R8 - lwi r7, r1, PT_R7 - lwi r6, r1, PT_R6 - lwi r5, r1, PT_R5 - lwi r4, r1, PT_R4 /* return val */ - lwi r3, r1, PT_R3 /* return val */ - lwi r2, r1, PT_R2 - lwi r1, r1, PT_R1 - - rtid r14, 0 - nop - -sys_rt_sigreturn_wrapper: - addk r30, r0, r0 /* no restarts for this one */ - brid sys_rt_sigreturn - addk r5, r1, r0 - - /* Interrupt vector table */ - .section .init.ivt, "ax" - .org 0x0 - brai _reset - brai _user_exception - brai _interrupt - brai _break - brai _hw_exception_handler - .org 0x60 - brai _debug_exception - -.section .rodata,"a" -#include "syscall_table.S" - -syscall_table_size=(.-sys_call_table) - -type_SYSCALL: - .ascii "SYSCALL\0" -type_IRQ: - .ascii "IRQ\0" -type_IRQ_PREEMPT: - .ascii "IRQ (PREEMPTED)\0" -type_SYSCALL_PREEMPT: - .ascii " SYSCALL (PREEMPTED)\0" - - /* - * Trap decoding for stack unwinder - * Tuples are (start addr, end addr, string) - * If return address lies on [start addr, end addr], - * unwinder displays 'string' - */ - - .align 4 -.global microblaze_trap_handlers -microblaze_trap_handlers: - /* Exact matches come first */ - .word ret_to_user ; .word ret_to_user ; .word type_SYSCALL - .word ret_from_intr; .word ret_from_intr ; .word type_IRQ - /* Fuzzy matches go here */ - .word ret_from_intr; .word no_intr_resched; .word type_IRQ_PREEMPT - .word work_pending ; .word no_work_pending; .word type_SYSCALL_PREEMPT - /* End of table */ - .word 0 ; .word 0 ; .word 0 diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index cf99c411503e..908788497b28 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -69,9 +69,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, int fsr, int addr) { -#ifdef CONFIG_MMU addr = regs->pc; -#endif #if 0 pr_warn("Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n", @@ -132,13 +130,10 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, fsr = FPE_FLTRES; _exception(SIGFPE, regs, fsr, addr); break; - -#ifdef CONFIG_MMU case MICROBLAZE_PRIVILEGED_EXCEPTION: pr_debug("Privileged exception\n"); _exception(SIGILL, regs, ILL_PRVOPC, addr); break; -#endif default: /* FIXME what to do in unexpected exception */ pr_warn("Unexpected exception %02x PC=%08x in %s mode\n", diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S index 14b276406153..ec2fcb545e64 100644 --- a/arch/microblaze/kernel/head.S +++ b/arch/microblaze/kernel/head.S @@ -34,7 +34,6 @@ #include #include /* for OF_DT_HEADER */ -#ifdef CONFIG_MMU #include /* COMMAND_LINE_SIZE */ #include #include @@ -48,8 +47,6 @@ empty_zero_page: swapper_pg_dir: .space PAGE_SIZE -#endif /* CONFIG_MMU */ - .section .rodata .align 4 endian_check: @@ -108,8 +105,6 @@ _copy_fdt: addik r3, r3, -4 /* descrement loop */ no_fdt_arg: -#ifdef CONFIG_MMU - #ifndef CONFIG_CMDLINE_BOOL /* * handling command line @@ -329,7 +324,6 @@ turn_on_mmu: nop start_here: -#endif /* CONFIG_MMU */ /* Initialize small data anchors */ addik r13, r0, _KERNEL_SDA_BASE_ @@ -345,11 +339,6 @@ start_here: brald r15, r11 nop -#ifndef CONFIG_MMU - addik r15, r0, machine_halt - braid start_kernel - nop -#else /* * Initialize the MMU. */ @@ -383,4 +372,3 @@ kernel_load_context: nop rted r17, 0 /* enable MMU and jump to start_kernel */ nop -#endif /* CONFIG_MMU */ diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S index 54411de22fa6..07ea23965f81 100644 --- a/arch/microblaze/kernel/hw_exception_handler.S +++ b/arch/microblaze/kernel/hw_exception_handler.S @@ -80,7 +80,6 @@ /* Helpful Macros */ #define NUM_TO_REG(num) r ## num -#ifdef CONFIG_MMU #define RESTORE_STATE \ lwi r5, r1, 0; \ mts rmsr, r5; \ @@ -92,7 +91,6 @@ lwi r11, r1, PT_R11; \ lwi r31, r1, PT_R31; \ lwi r1, r1, PT_R1; -#endif /* CONFIG_MMU */ #define LWREG_NOP \ bri ex_handler_unhandled; \ @@ -102,10 +100,6 @@ bri ex_handler_unhandled; \ nop; -/* FIXME this is weird - for noMMU kernel is not possible to use brid - * instruction which can shorten executed time - */ - /* r3 is the source */ #define R3_TO_LWREG_V(regnum) \ swi r3, r1, 4 * regnum; \ @@ -126,7 +120,6 @@ or r3, r0, NUM_TO_REG (regnum); \ bri ex_sw_tail; -#ifdef CONFIG_MMU #define R3_TO_LWREG_VM_V(regnum) \ brid ex_lw_end_vm; \ swi r3, r7, 4 * regnum; @@ -193,7 +186,6 @@ .endm #endif -#endif /* CONFIG_MMU */ .extern other_exception_handler /* Defined in exception.c */ @@ -251,7 +243,6 @@ */ /* wrappers to restore state before coming to entry.S */ -#ifdef CONFIG_MMU .section .data .align 4 pt_pool_space: @@ -316,31 +307,24 @@ _MB_HW_ExceptionVectorTable: .long TOPHYS(ex_handler_unhandled) .long TOPHYS(ex_handler_unhandled) .long TOPHYS(ex_handler_unhandled) -#endif .global _hw_exception_handler .section .text .align 4 .ent _hw_exception_handler _hw_exception_handler: -#ifndef CONFIG_MMU - addik r1, r1, -(EX_HANDLER_STACK_SIZ); /* Create stack frame */ -#else swi r1, r0, TOPHYS(pt_pool_space + PT_R1); /* GET_SP */ /* Save date to kernel memory. Here is the problem * when you came from user space */ ori r1, r0, TOPHYS(pt_pool_space); -#endif swi r3, r1, PT_R3 swi r4, r1, PT_R4 swi r5, r1, PT_R5 swi r6, r1, PT_R6 -#ifdef CONFIG_MMU swi r11, r1, PT_R11 swi r31, r1, PT_R31 lwi r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */ -#endif mfs r5, rmsr; nop @@ -350,18 +334,8 @@ _hw_exception_handler: mfs r3, rear; nop -#ifndef CONFIG_MMU - andi r5, r4, 0x1000; /* Check ESR[DS] */ - beqi r5, not_in_delay_slot; /* Branch if ESR[DS] not set */ - mfs r17, rbtr; /* ESR[DS] set - return address in BTR */ - nop -not_in_delay_slot: - swi r17, r1, PT_R17 -#endif - andi r5, r4, 0x1F; /* Extract ESR[EXC] */ -#ifdef CONFIG_MMU /* Calculate exception vector offset = r5 << 2 */ addk r6, r5, r5; /* << 1 */ addk r6, r6, r6; /* << 2 */ @@ -383,73 +357,6 @@ not_in_delay_slot: full_exception_trapw: RESTORE_STATE bri full_exception_trap -#else - /* Exceptions enabled here. This will allow nested exceptions */ - mfs r6, rmsr; - nop - swi r6, r1, 0; /* RMSR_OFFSET */ - ori r6, r6, 0x100; /* Turn ON the EE bit */ - andi r6, r6, ~2; /* Disable interrupts */ - mts rmsr, r6; - nop - - xori r6, r5, 1; /* 00001 = Unaligned Exception */ - /* Jump to unalignment exception handler */ - beqi r6, handle_unaligned_ex; - -handle_other_ex: /* Handle Other exceptions here */ - /* Save other volatiles before we make procedure calls below */ - swi r7, r1, PT_R7 - swi r8, r1, PT_R8 - swi r9, r1, PT_R9 - swi r10, r1, PT_R10 - swi r11, r1, PT_R11 - swi r12, r1, PT_R12 - swi r14, r1, PT_R14 - swi r15, r1, PT_R15 - swi r18, r1, PT_R18 - - or r5, r1, r0 - andi r6, r4, 0x1F; /* Load ESR[EC] */ - lwi r7, r0, PER_CPU(KM) /* MS: saving current kernel mode to regs */ - swi r7, r1, PT_MODE - mfs r7, rfsr - nop - addk r8, r17, r0; /* Load exception address */ - bralid r15, full_exception; /* Branch to the handler */ - nop; - mts rfsr, r0; /* Clear sticky fsr */ - nop - - /* - * Trigger execution of the signal handler by enabling - * interrupts and calling an invalid syscall. - */ - mfs r5, rmsr; - nop - ori r5, r5, 2; - mts rmsr, r5; /* enable interrupt */ - nop - addi r12, r0, __NR_syscalls; - brki r14, 0x08; - mfs r5, rmsr; /* disable interrupt */ - nop - andi r5, r5, ~2; - mts rmsr, r5; - nop - - lwi r7, r1, PT_R7 - lwi r8, r1, PT_R8 - lwi r9, r1, PT_R9 - lwi r10, r1, PT_R10 - lwi r11, r1, PT_R11 - lwi r12, r1, PT_R12 - lwi r14, r1, PT_R14 - lwi r15, r1, PT_R15 - lwi r18, r1, PT_R18 - - bri ex_handler_done; /* Complete exception handling */ -#endif /* 0x01 - Unaligned data access exception * This occurs when a word access is not aligned on a word boundary, @@ -463,7 +370,6 @@ handle_unaligned_ex: * R4 = ESR * R3 = EAR */ -#ifdef CONFIG_MMU andi r6, r4, 0x1000 /* Check ESR[DS] */ beqi r6, _no_delayslot /* Branch if ESR[DS] not set */ mfs r17, rbtr; /* ESR[DS] set - return address in BTR */ @@ -472,7 +378,7 @@ _no_delayslot: /* jump to high level unaligned handler */ RESTORE_STATE; bri unaligned_data_trap -#endif + andi r6, r4, 0x3E0; /* Mask and extract the register operand */ srl r6, r6; /* r6 >> 5 */ srl r6, r6; @@ -558,25 +464,10 @@ ex_shw: ex_sw_end: /* Exception handling of store word, ends. */ ex_handler_done: -#ifndef CONFIG_MMU - lwi r5, r1, 0 /* RMSR */ - mts rmsr, r5 - nop - lwi r3, r1, PT_R3 - lwi r4, r1, PT_R4 - lwi r5, r1, PT_R5 - lwi r6, r1, PT_R6 - lwi r17, r1, PT_R17 - - rted r17, 0 - addik r1, r1, (EX_HANDLER_STACK_SIZ); /* Restore stack frame */ -#else RESTORE_STATE; rted r17, 0 nop -#endif -#ifdef CONFIG_MMU /* Exception vector entry code. This code runs with address translation * turned off (i.e. using physical addresses). */ @@ -882,13 +773,7 @@ ex_handler_done: * bits 20 and 21 are zero. */ andi r3, r3, PAGE_MASK -#ifdef CONFIG_MICROBLAZE_64K_PAGES - ori r3, r3, TLB_VALID | TLB_PAGESZ(PAGESZ_64K) -#elif CONFIG_MICROBLAZE_16K_PAGES - ori r3, r3, TLB_VALID | TLB_PAGESZ(PAGESZ_16K) -#else ori r3, r3, TLB_VALID | TLB_PAGESZ(PAGESZ_4K) -#endif mts rtlbhi, r3 /* Load TLB HI */ nop @@ -926,10 +811,8 @@ ex_handler_done: rtsd r15,8 nop -#endif .end _hw_exception_handler -#ifdef CONFIG_MMU /* Unaligned data access exception last on a 4k page for MMU. * When this is called, we are in virtual mode with exceptions enabled * and registers 1-13,15,17,18 saved. @@ -1044,7 +927,6 @@ ex_unaligned_fixup: .word store6,ex_unaligned_fixup; .previous; .end _unaligned_data_exception -#endif /* CONFIG_MMU */ .global ex_handler_unhandled ex_handler_unhandled: @@ -1093,11 +975,7 @@ lw_r27: R3_TO_LWREG (27); lw_r28: R3_TO_LWREG (28); lw_r29: R3_TO_LWREG (29); lw_r30: R3_TO_LWREG (30); -#ifdef CONFIG_MMU lw_r31: R3_TO_LWREG_V (31); -#else -lw_r31: R3_TO_LWREG (31); -#endif sw_table: sw_r0: SWREG_TO_R3 (0); @@ -1131,13 +1009,8 @@ sw_r27: SWREG_TO_R3 (27); sw_r28: SWREG_TO_R3 (28); sw_r29: SWREG_TO_R3 (29); sw_r30: SWREG_TO_R3 (30); -#ifdef CONFIG_MMU sw_r31: SWREG_TO_R3_V (31); -#else -sw_r31: SWREG_TO_R3 (31); -#endif -#ifdef CONFIG_MMU lw_table_vm: lw_r0_vm: R3_TO_LWREG_VM (0); lw_r1_vm: R3_TO_LWREG_VM_V (1); @@ -1205,7 +1078,6 @@ sw_r28_vm: SWREG_TO_R3_VM_V (28); sw_r29_vm: SWREG_TO_R3_VM_V (29); sw_r30_vm: SWREG_TO_R3_VM_V (30); sw_r31_vm: SWREG_TO_R3_VM_V (31); -#endif /* CONFIG_MMU */ /* Temporary data structures used in the handler */ .section .data diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c index 51c43ee5e380..303aaf13573b 100644 --- a/arch/microblaze/kernel/microblaze_ksyms.c +++ b/arch/microblaze/kernel/microblaze_ksyms.c @@ -33,9 +33,7 @@ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); #endif -#ifdef CONFIG_MMU EXPORT_SYMBOL(empty_zero_page); -#endif EXPORT_SYMBOL(mbc); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index f99860771ff4..657c2beb665e 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -69,9 +69,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, ti->cpu_context.r19 = (unsigned long)arg; childregs->pt_mode = 1; local_save_flags(childregs->msr); -#ifdef CONFIG_MMU ti->cpu_context.msr = childregs->msr & ~MSR_IE; -#endif ti->cpu_context.r15 = (unsigned long)ret_from_kernel_thread - 8; return 0; } @@ -81,9 +79,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, memset(&ti->cpu_context, 0, sizeof(struct cpu_context)); ti->cpu_context.r1 = (unsigned long)childregs; -#ifndef CONFIG_MMU - ti->cpu_context.msr = (unsigned long)childregs->msr; -#else childregs->msr |= MSR_UMS; /* we should consider the fact that childregs is a copy of the parent @@ -105,7 +100,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, ti->cpu_context.msr = (childregs->msr|MSR_VM); ti->cpu_context.msr &= ~MSR_UMS; /* switch_to to kernel mode */ ti->cpu_context.msr &= ~MSR_IE; -#endif ti->cpu_context.r15 = (unsigned long)ret_from_fork - 8; /* @@ -130,13 +124,10 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp) regs->pc = pc; regs->r1 = usp; regs->pt_mode = 0; -#ifdef CONFIG_MMU regs->msr |= MSR_UMS; regs->msr &= ~MSR_VM; -#endif } -#ifdef CONFIG_MMU #include /* * Set up a thread for executing a new program @@ -145,7 +136,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs) { return 0; /* MicroBlaze has no separate FPU registers */ } -#endif /* CONFIG_MMU */ void arch_cpu_idle(void) { diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 333b09658ca8..f417333eccae 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include #include @@ -190,12 +190,10 @@ static int microblaze_debugfs_init(void) } arch_initcall(microblaze_debugfs_init); -# ifdef CONFIG_MMU static int __init debugfs_tlb(void) { debugfs_create_u32("tlb_skip", S_IRUGO, of_debugfs_root, &tlb_skip); return 0; } device_initcall(debugfs_tlb); -# endif #endif diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index f11a0ccccabc..fc61eb0eb8dd 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -157,10 +157,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct rt_sigframe __user *frame; int err = 0, sig = ksig->sig; unsigned long address = 0; -#ifdef CONFIG_MMU pmd_t *pmdp; pte_t *ptep; -#endif frame = get_sigframe(ksig, regs, sizeof(*frame)); @@ -192,7 +190,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, regs->r15 = ((unsigned long)frame->tramp)-8; address = ((unsigned long)frame->tramp); -#ifdef CONFIG_MMU pmdp = pmd_off(current->mm, address); preempt_disable(); @@ -208,10 +205,6 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, } pte_unmap(ptep); preempt_enable(); -#else - flush_icache_range(address, address + 8); - flush_dcache_range(address, address + 8); -#endif if (err) return -EFAULT; @@ -313,7 +306,8 @@ static void do_signal(struct pt_regs *regs, int in_syscall) asmlinkage void do_notify_resume(struct pt_regs *regs, int in_syscall) { - if (test_thread_flag(TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) do_signal(regs, in_syscall); if (test_thread_flag(TIF_NOTIFY_RESUME)) diff --git a/arch/microblaze/kernel/unwind.c b/arch/microblaze/kernel/unwind.c index 778a761af0a7..a530a7a6be7d 100644 --- a/arch/microblaze/kernel/unwind.c +++ b/arch/microblaze/kernel/unwind.c @@ -161,22 +161,12 @@ static void microblaze_unwind_inner(struct task_struct *task, * unwind_trap - Unwind through a system trap, that stored previous state * on the stack. */ -#ifdef CONFIG_MMU static inline void unwind_trap(struct task_struct *task, unsigned long pc, unsigned long fp, struct stack_trace *trace, const char *loglvl) { /* To be implemented */ } -#else -static inline void unwind_trap(struct task_struct *task, unsigned long pc, - unsigned long fp, struct stack_trace *trace, - const char *loglvl) -{ - const struct pt_regs *regs = (const struct pt_regs *) fp; - microblaze_unwind_inner(task, regs->pc, regs->r1, regs->r15, trace, loglvl); -} -#endif /** * microblaze_unwind_inner - Unwind the stack from the specified point @@ -215,16 +205,7 @@ static void microblaze_unwind_inner(struct task_struct *task, * HW exception handler doesn't save all registers, * so we open-code a special case of unwind_trap() */ -#ifndef CONFIG_MMU - const struct pt_regs *regs = - (const struct pt_regs *) fp; -#endif printk("%sHW EXCEPTION\n", loglvl); -#ifndef CONFIG_MMU - microblaze_unwind_inner(task, regs->r17 - 4, - fp + EX_HANDLER_STACK_SIZ, - regs->r15, trace, loglvl); -#endif return; } diff --git a/arch/microblaze/mm/Makefile b/arch/microblaze/mm/Makefile index 8ced71100047..75edfc110d3e 100644 --- a/arch/microblaze/mm/Makefile +++ b/arch/microblaze/mm/Makefile @@ -3,6 +3,4 @@ # Makefile # -obj-y := consistent.o init.o - -obj-$(CONFIG_MMU) += pgtable.o mmu_context.o fault.o +obj-y := consistent.o init.o pgtable.o mmu_context.o fault.o diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index 81dffe43b18c..b7ad4a98636d 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -21,32 +21,3 @@ void arch_dma_prep_coherent(struct page *page, size_t size) flush_dcache_range(paddr, paddr + size); } - -#ifndef CONFIG_MMU -/* - * Consistent memory allocators. Used for DMA devices that want to share - * uncached memory with the processor core. My crufty no-MMU approach is - * simple. In the HW platform we can optionally mirror the DDR up above the - * processor cacheable region. So, memory accessed in this mirror region will - * not be cached. It's alloced from the same pool as normal memory, but the - * handle we return is shifted up into the uncached region. This will no doubt - * cause big problems if memory allocated here is not also freed properly. -- JW - * - * I have to use dcache values because I can't relate on ram size: - */ -#ifdef CONFIG_XILINX_UNCACHED_SHADOW -#define UNCACHED_SHADOW_MASK (cpuinfo.dcache_high - cpuinfo.dcache_base + 1) -#else -#define UNCACHED_SHADOW_MASK 0 -#endif /* CONFIG_XILINX_UNCACHED_SHADOW */ - -void *arch_dma_set_uncached(void *ptr, size_t size) -{ - unsigned long addr = (unsigned long)ptr; - - addr |= UNCACHED_SHADOW_MASK; - if (addr > cpuinfo.dcache_base && addr < cpuinfo.dcache_high) - pr_warn("ERROR: Your cache coherent area is CACHED!!!\n"); - return (void *)addr; -} -#endif /* CONFIG_MMU */ diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index a444778e59de..181e48782e6c 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -29,11 +29,6 @@ /* Use for MMU and noMMU because of PCI generic code */ int mem_init_done; -#ifndef CONFIG_MMU -unsigned int __page_offset; -EXPORT_SYMBOL(__page_offset); -#endif /* CONFIG_MMU */ - char *klimit = _end; /* @@ -77,13 +72,11 @@ static void highmem_setup(void) static void __init paging_init(void) { unsigned long zones_size[MAX_NR_ZONES]; -#ifdef CONFIG_MMU int idx; /* Setup fixmaps */ for (idx = 0; idx < __end_of_fixed_addresses; idx++) clear_fixmap(idx); -#endif /* Clean every zones */ memset(zones_size, 0, sizeof(zones_size)); @@ -103,40 +96,6 @@ static void __init paging_init(void) void __init setup_memory(void) { -#ifndef CONFIG_MMU - u32 kernel_align_start, kernel_align_size; - phys_addr_t start, end; - u64 i; - - /* Find main memory where is the kernel */ - for_each_mem_range(i, &start, &end) { - memory_start = start; - lowmem_size = end - start; - if ((memory_start <= (u32)_text) && - ((u32)_text <= (memory_start + lowmem_size - 1))) { - memory_size = lowmem_size; - PAGE_OFFSET = memory_start; - pr_info("%s: Main mem: 0x%x, size 0x%08x\n", - __func__, (u32) memory_start, - (u32) memory_size); - break; - } - } - - if (!memory_start || !memory_size) { - panic("%s: Missing memory setting 0x%08x, size=0x%08x\n", - __func__, (u32) memory_start, (u32) memory_size); - } - - /* reservation of region where is the kernel */ - kernel_align_start = PAGE_DOWN((u32)_text); - /* ALIGN can be remove because _end in vmlinux.lds.S is align */ - kernel_align_size = PAGE_UP((u32)klimit) - kernel_align_start; - pr_info("%s: kernel addr:0x%08x-0x%08x size=0x%08x\n", - __func__, kernel_align_start, kernel_align_start - + kernel_align_size, kernel_align_size); - memblock_reserve(kernel_align_start, kernel_align_size); -#endif /* * Kernel: * start: base phys address of kernel - page align @@ -176,12 +135,6 @@ void __init mem_init(void) mem_init_done = 1; } -#ifndef CONFIG_MMU -int page_is_ram(unsigned long pfn) -{ - return __range_ok(pfn, 0); -} -#else int page_is_ram(unsigned long pfn) { return pfn < max_low_pfn; @@ -325,8 +278,6 @@ void __init *early_get_page(void) NUMA_NO_NODE); } -#endif /* CONFIG_MMU */ - void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 60a58c0015f2..557585f1be41 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -325,12 +325,10 @@ int pci_mmap_legacy_page_range(struct pci_bus *bus, * memory, effectively behaving just like /dev/zero */ if ((offset + size) > hose->isa_mem_size) { -#ifdef CONFIG_MMU pr_debug("Process %s (pid:%d) mapped non-existing PCI", current->comm, current->pid); pr_debug("legacy memory for 0%04x:%02x\n", pci_domain_nr(bus), bus->number); -#endif if (vma->vm_flags & VM_SHARED) return shmem_zero_setup(vma); return 0; diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8d50bc1561f2..0a17bedf4f0d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -9,6 +9,8 @@ config MIPS select ARCH_HAS_PTE_SPECIAL if !(32BIT && CPU_HAS_RIXI) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_KEEP_MEMBLOCK if DEBUG_KERNEL select ARCH_SUPPORTS_UPROBES select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if 64BIT @@ -248,6 +250,7 @@ config ATH79 config BMIPS_GENERIC bool "Broadcom Generic BMIPS kernel" + select ARCH_HAS_RESET_CONTROLLER select ARCH_HAS_SYNC_DMA_FOR_CPU_ALL select ARCH_HAS_PHYS_TO_DMA select BOOT_RAW @@ -486,6 +489,7 @@ config MACH_LOONGSON64 select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN select SYS_SUPPORTS_ZBOOT + select SYS_SUPPORTS_RELOCATABLE select ZONE_DMA32 select NUMA select SMP @@ -2484,6 +2488,7 @@ config MIPS_CPS select SYS_SUPPORTS_SCHED_SMT if CPU_MIPSR6 select SYS_SUPPORTS_SMP select WEAK_ORDERING + select GENERIC_IRQ_MIGRATION if HOTPLUG_CPU help Select this if you wish to run an SMP kernel across multiple cores within a MIPS Coherent Processing System. When this option is @@ -2644,7 +2649,7 @@ config WAR_R4600_V1_INDEX_ICACHEOP # 18. The CACHE instructions Hit_Writeback_Invalidate_D, Hit_Writeback_D, # Hit_Invalidate_D and Create_Dirty_Excl_D should only be # executed if there is no other dcache activity. If the dcache is -# accessed for another instruction immeidately preceding when these +# accessed for another instruction immediately preceding when these # cache instructions are executing, it is possible that the dcache # tag match outputs used by these cache instructions will be # incorrect. These cache instructions should be preceded by at least @@ -2777,7 +2782,8 @@ config RELOCATABLE depends on CPU_MIPS32_R2 || CPU_MIPS64_R2 || \ CPU_MIPS32_R5 || CPU_MIPS64_R5 || \ CPU_MIPS32_R6 || CPU_MIPS64_R6 || \ - CPU_P5600 || CAVIUM_OCTEON_SOC + CPU_P5600 || CAVIUM_OCTEON_SOC || \ + CPU_LOONGSON64 help This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. @@ -2788,6 +2794,7 @@ config RELOCATION_TABLE_SIZE hex "Relocation table size" depends on RELOCATABLE range 0x0 0x01000000 + default "0x00200000" if CPU_LOONGSON64 default "0x00100000" help A table of relocation data will be appended to the kernel binary @@ -3086,7 +3093,7 @@ config MIPS_O32_FP64_SUPPORT Although binutils currently supports use of this flag the details concerning its effect upon the O32 ABI in userland are still being - worked on. In order to avoid userland becoming dependant upon current + worked on. In order to avoid userland becoming dependent upon current behaviour before the details have been finalised, this option should be considered experimental and only enabled by those working upon said details. @@ -3124,7 +3131,7 @@ choice objcopy --update-section .appended_dtb=.dtb vmlinux - This is meant as a backward compatiblity convenience for those + This is meant as a backward compatibility convenience for those systems with a bootloader that can't be upgraded to accommodate the documented boot protocol using a device tree. diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 0d0f29d662c9..cd4343edeb11 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -347,6 +347,7 @@ bootz-y += vmlinuz.srec ifeq ($(shell expr $(zload-y) \< 0xffffffff80000000 2> /dev/null), 0) bootz-y += uzImage.bin endif +bootz-y += vmlinuz.itb # # Some machines like the Indy need 32-bit ELF binaries for booting purposes. @@ -378,7 +379,7 @@ ifdef CONFIG_SYS_SUPPORTS_ZBOOT # boot/compressed $(bootz-y): $(vmlinux-32) FORCE $(Q)$(MAKE) $(build)=arch/mips/boot/compressed \ - $(bootvars-y) 32bit-bfd=$(32bit-bfd) $@ + $(bootvars-y) 32bit-bfd=$(32bit-bfd) arch/mips/boot/$@ else vmlinuz: FORCE @echo ' CONFIG_SYS_SUPPORTS_ZBOOT is not enabled' diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c index 8b006addd6ba..ae0e01b9438f 100644 --- a/arch/mips/ar7/gpio.c +++ b/arch/mips/ar7/gpio.c @@ -319,6 +319,7 @@ int __init ar7_gpio_init(void) if (ret) { printk(KERN_ERR "%s: failed to add gpiochip\n", gpch->chip.label); + iounmap(gpch->regs); return ret; } printk(KERN_INFO "%s: registered %d GPIOs\n", diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index 6889f74e06f5..13d315ee676b 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -9,6 +9,7 @@ config BCM47XX_SSB select SSB_DRIVER_MIPS select SSB_DRIVER_EXTIF select SSB_EMBEDDED + select SSB_PCIHOST if PCI select SSB_B43_PCI_BRIDGE if PCI select SSB_DRIVER_PCICORE if PCI select SSB_PCICORE_HOSTMODE if PCI @@ -27,6 +28,7 @@ config BCM47XX_BCMA select BCMA select BCMA_HOST_SOC select BCMA_DRIVER_MIPS + select BCMA_DRIVER_PCI if PCI select BCMA_DRIVER_PCI_HOSTMODE if PCI select BCMA_DRIVER_GPIO default y diff --git a/arch/mips/boot/.gitignore b/arch/mips/boot/.gitignore index 2adc8581a175..1c7adddf2e60 100644 --- a/arch/mips/boot/.gitignore +++ b/arch/mips/boot/.gitignore @@ -2,6 +2,7 @@ mkboot elf2ecoff vmlinux.* +vmlinuz.* zImage zImage.tmp calc_vmlinuz_load_addr diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index d66511825fe1..47cd9dc7454a 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -36,6 +36,7 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n +GCOV_PROFILE := n # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o @@ -65,7 +66,9 @@ $(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE targets := $(notdir $(vmlinuzobjs-y)) targets += vmlinux.bin + OBJCOPYFLAGS_vmlinux.bin := $(OBJCOPYFLAGS) -O binary -R .comment -S + $(obj)/vmlinux.bin: $(KBUILD_IMAGE) FORCE $(call if_changed,objcopy) @@ -78,12 +81,15 @@ tool_$(CONFIG_KERNEL_XZ) = xzkern tool_$(CONFIG_KERNEL_ZSTD) = zstd22 targets += vmlinux.bin.z + $(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE $(call if_changed,$(tool_y)) targets += piggy.o dummy.o + OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \ --set-section-flags=.image=contents,alloc,load,readonly,data + $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE $(call if_changed,objcopy) @@ -102,14 +108,21 @@ UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS) vmlinuzobjs-y += $(obj)/piggy.o +targets += ../../../../vmlinuz + quiet_cmd_zld = LD $@ cmd_zld = $(LD) $(KBUILD_LDFLAGS) -Ttext $(VMLINUZ_LOAD_ADDRESS) -T $< $(vmlinuzobjs-y) -o $@ -quiet_cmd_strip = STRIP $@ +quiet_cmd_strip = STRIP $@ cmd_strip = $(STRIP) -s $@ -vmlinuz: $(src)/ld.script $(vmlinuzobjs-y) $(obj)/calc_vmlinuz_load_addr + +$(objtree)/vmlinuz: $(src)/ld.script $(vmlinuzobjs-y) $(obj)/calc_vmlinuz_load_addr $(call cmd,zld) $(call cmd,strip) +objboot := $(objtree)/arch/mips/boot + +$(objboot)/vmlinuz: $(objtree)/vmlinuz FORCE + # # Some DECstations need all possible sections of an ECOFF executable # @@ -121,34 +134,90 @@ endif hostprogs += ../elf2ecoff ifdef CONFIG_32BIT - VMLINUZ = vmlinuz + VMLINUZ = $(objtree)/vmlinuz else - VMLINUZ = vmlinuz.32 + VMLINUZ = $(objboot)/vmlinuz.32 endif +targets += ../vmlinuz.32 + quiet_cmd_32 = OBJCOPY $@ cmd_32 = $(OBJCOPY) -O $(32bit-bfd) $(OBJCOPYFLAGS) $< $@ -vmlinuz.32: vmlinuz + +$(objboot)/vmlinuz.32: $(objtree)/vmlinuz $(call cmd,32) +targets += ../vmlinuz.ecoff + quiet_cmd_ecoff = ECOFF $@ cmd_ecoff = $< $(VMLINUZ) $@ $(e2eflag) -vmlinuz.ecoff: $(obj)/../elf2ecoff $(VMLINUZ) + +$(objboot)/vmlinuz.ecoff: $(objboot)/elf2ecoff $(VMLINUZ) $(call cmd,ecoff) +targets += ../vmlinuz.bin + OBJCOPYFLAGS_vmlinuz.bin := $(OBJCOPYFLAGS) -O binary -vmlinuz.bin: vmlinuz + +$(objboot)/vmlinuz.bin: $(objtree)/vmlinuz $(call cmd,objcopy) +targets += ../vmlinuz.srec + OBJCOPYFLAGS_vmlinuz.srec := $(OBJCOPYFLAGS) -S -O srec -vmlinuz.srec: vmlinuz + +$(objboot)/vmlinuz.srec: $(objtree)/vmlinuz $(call cmd,objcopy) -uzImage.bin: vmlinuz.bin FORCE +targets += ../uzImage.bin + +$(objboot)/uzImage.bin: $(objboot)/vmlinuz.bin FORCE $(call if_changed,uimage,none) -clean-files += $(objtree)/vmlinuz -clean-files += $(objtree)/vmlinuz.32 -clean-files += $(objtree)/vmlinuz.ecoff -clean-files += $(objtree)/vmlinuz.bin -clean-files += $(objtree)/vmlinuz.srec +# +# Flattened Image Tree (.itb) image +# + +ifeq ($(ADDR_BITS),32) +itb_addr_cells = 1 +endif +ifeq ($(ADDR_BITS),64) +itb_addr_cells = 2 +endif + +targets += ../vmlinuz.its.S + +quiet_cmd_its_cat = CAT $@ + cmd_its_cat = cat $(real-prereqs) >$@ + +$(objboot)/vmlinuz.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS)) FORCE + $(call if_changed,its_cat) + +targets += ../vmlinuz.its + +quiet_cmd_cpp_its_S = ITS $@ + cmd_cpp_its_S = $(CPP) -P -C -o $@ $< \ + -DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \ + -DVMLINUX_BINARY="\"$(2)\"" \ + -DVMLINUX_COMPRESSION="\"none\"" \ + -DVMLINUX_LOAD_ADDRESS=$(VMLINUZ_LOAD_ADDRESS) \ + -DVMLINUX_ENTRY_ADDRESS=$(VMLINUZ_LOAD_ADDRESS) \ + -DADDR_BITS=$(ADDR_BITS) \ + -DADDR_CELLS=$(itb_addr_cells) + +$(objboot)/vmlinuz.its: $(objboot)/vmlinuz.its.S FORCE + $(call if_changed,cpp_its_S,vmlinuz.bin) + +targets += ../vmlinuz.itb + +quiet_cmd_itb-image = ITB $@ + cmd_itb-image = \ + env PATH="$(objtree)/scripts/dtc:$(PATH)" \ + $(BASH) $(MKIMAGE) \ + -D "-I dts -O dtb -p 500 \ + --include $(objtree)/arch/mips \ + --warning no-unit_address_vs_reg" \ + -f $(2) $@ + +$(objboot)/vmlinuz.itb: $(objboot)/vmlinuz.its $(objboot)/vmlinuz.bin FORCE + $(call if_changed,itb-image,$<) diff --git a/arch/mips/boot/compressed/ld.script b/arch/mips/boot/compressed/ld.script index 2ed08fbef8e7..0ebb667274d6 100644 --- a/arch/mips/boot/compressed/ld.script +++ b/arch/mips/boot/compressed/ld.script @@ -31,9 +31,12 @@ SECTIONS CONSTRUCTORS . = ALIGN(16); } - __appended_dtb = .; - /* leave space for appended DTB */ - . += 0x100000; + + .appended_dtb : { + __appended_dtb = .; + /* leave space for appended DTB */ + . += 0x100000; + } _edata = .; /* End of data section */ diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index 19027129add8..0259238d7a2e 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -6,7 +6,7 @@ subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += img subdir-$(CONFIG_MACH_INGENIC) += ingenic subdir-$(CONFIG_LANTIQ) += lantiq subdir-$(CONFIG_MACH_LOONGSON64) += loongson -subdir-$(CONFIG_MSCC_OCELOT) += mscc +subdir-$(CONFIG_SOC_VCOREIII) += mscc subdir-$(CONFIG_MIPS_MALTA) += mti subdir-$(CONFIG_LEGACY_BOARD_SEAD3) += mti subdir-$(CONFIG_NLM_XLP_BOARD) += netlogic diff --git a/arch/mips/boot/dts/brcm/bcm63268.dtsi b/arch/mips/boot/dts/brcm/bcm63268.dtsi index 5acb49b61867..e0021ff9f144 100644 --- a/arch/mips/boot/dts/brcm/bcm63268.dtsi +++ b/arch/mips/boot/dts/brcm/bcm63268.dtsi @@ -70,6 +70,12 @@ reboot: syscon-reboot@10000008 { mask = <0x1>; }; + periph_rst: reset-controller@10000010 { + compatible = "brcm,bcm6345-reset"; + reg = <0x10000010 0x4>; + #reset-cells = <1>; + }; + periph_intc: interrupt-controller@10000020 { compatible = "brcm,bcm6345-l1-intc"; reg = <0x10000020 0x20>, diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi index 1f9edd710392..9dc558763c46 100644 --- a/arch/mips/boot/dts/brcm/bcm6328.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi @@ -57,6 +57,12 @@ clkctl: clock-controller@10000004 { #clock-cells = <1>; }; + periph_rst: reset-controller@10000010 { + compatible = "brcm,bcm6345-reset"; + reg = <0x10000010 0x4>; + #reset-cells = <1>; + }; + periph_intc: interrupt-controller@10000020 { compatible = "brcm,bcm6345-l1-intc"; reg = <0x10000020 0x10>, diff --git a/arch/mips/boot/dts/brcm/bcm6358.dtsi b/arch/mips/boot/dts/brcm/bcm6358.dtsi index f21176cac038..9d93e7f5e6fc 100644 --- a/arch/mips/boot/dts/brcm/bcm6358.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6358.dtsi @@ -82,6 +82,12 @@ periph_intc: interrupt-controller@fffe000c { interrupts = <2>, <3>; }; + periph_rst: reset-controller@fffe0034 { + compatible = "brcm,bcm6345-reset"; + reg = <0xfffe0034 0x4>; + #reset-cells = <1>; + }; + leds0: led-controller@fffe00d0 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/mips/boot/dts/brcm/bcm6362.dtsi b/arch/mips/boot/dts/brcm/bcm6362.dtsi index c98f9111e3c8..eb10341b75ba 100644 --- a/arch/mips/boot/dts/brcm/bcm6362.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6362.dtsi @@ -70,6 +70,12 @@ reboot: syscon-reboot@10000008 { mask = <0x1>; }; + periph_rst: reset-controller@10000010 { + compatible = "brcm,bcm6345-reset"; + reg = <0x10000010 0x4>; + #reset-cells = <1>; + }; + periph_intc: interrupt-controller@10000020 { compatible = "brcm,bcm6345-l1-intc"; reg = <0x10000020 0x10>, diff --git a/arch/mips/boot/dts/brcm/bcm6368.dtsi b/arch/mips/boot/dts/brcm/bcm6368.dtsi index 449c167dd892..52c19f40b9cc 100644 --- a/arch/mips/boot/dts/brcm/bcm6368.dtsi +++ b/arch/mips/boot/dts/brcm/bcm6368.dtsi @@ -70,6 +70,12 @@ reboot: syscon-reboot@10000008 { mask = <0x1>; }; + periph_rst: reset-controller@10000010 { + compatible = "brcm,bcm6345-reset"; + reg = <0x10000010 0x4>; + #reset-cells = <1>; + }; + periph_intc: interrupt-controller@10000020 { compatible = "brcm,bcm6345-l1-intc"; reg = <0x10000020 0x10>, diff --git a/arch/mips/boot/dts/img/pistachio_marduk.dts b/arch/mips/boot/dts/img/pistachio_marduk.dts index bf69da96dc8b..a8708783f04b 100644 --- a/arch/mips/boot/dts/img/pistachio_marduk.dts +++ b/arch/mips/boot/dts/img/pistachio_marduk.dts @@ -46,9 +46,10 @@ internal_dac_supply: internal-dac-supply { regulator-max-microvolt = <1800000>; }; - leds { + led-controller { compatible = "pwm-leds"; - heartbeat { + + led-1 { label = "marduk:red:heartbeat"; pwms = <&pwm 3 300000>; max-brightness = <255>; diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 75f5bfbf2c37..8877c62609de 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -69,9 +69,11 @@ led3 { eth0_power: fixedregulator@0 { compatible = "regulator-fixed"; + regulator-name = "eth0_power"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; + gpio = <&gpb 25 GPIO_ACTIVE_LOW>; enable-active-high; }; @@ -83,16 +85,39 @@ ir: ir { wlan0_power: fixedregulator@1 { compatible = "regulator-fixed"; + regulator-name = "wlan0_power"; + gpio = <&gpb 19 GPIO_ACTIVE_LOW>; enable-active-high; }; + + otg_power: fixedregulator@2 { + compatible = "regulator-fixed"; + + regulator-name = "otg_power"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + + gpio = <&gpf 14 GPIO_ACTIVE_LOW>; + enable-active-high; + }; }; &ext { clock-frequency = <48000000>; }; +&cgu { + /* + * Use the 32.768 kHz oscillator as the parent of the RTC for a higher + * precision. + */ + assigned-clocks = <&cgu JZ4780_CLK_OTGPHY>, <&cgu JZ4780_CLK_RTC>; + assigned-clock-parents = <0>, <&cgu JZ4780_CLK_RTCLK>; + assigned-clock-rates = <48000000>; +}; + &mmc0 { status = "okay"; @@ -396,6 +421,16 @@ &bch { status = "okay"; }; +&otg_phy { + status = "okay"; + + vcc-supply = <&otg_power>; +}; + +&otg { + status = "okay"; +}; + &pinctrl { pins_uart0: uart0 { function = "uart0"; @@ -489,7 +524,11 @@ pins_mmc1: mmc1 { }; &tcu { - /* 3 MHz for the system timer and clocksource */ - assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>; - assigned-clock-rates = <3000000>, <3000000>; + /* + * 750 kHz for the system timer and 3 MHz for the clocksource, + * use channel #0 for the system timer, #1 for the clocksource. + */ + assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>, + <&tcu TCU_CLK_OST>; + assigned-clock-rates = <750000>, <3000000>, <3000000>; }; diff --git a/arch/mips/boot/dts/ingenic/cu1000-neo.dts b/arch/mips/boot/dts/ingenic/cu1000-neo.dts index 22a1066d637b..f98cf029efc3 100644 --- a/arch/mips/boot/dts/ingenic/cu1000-neo.dts +++ b/arch/mips/boot/dts/ingenic/cu1000-neo.dts @@ -3,7 +3,7 @@ #include "x1000.dtsi" #include -#include +#include #include / { @@ -31,6 +31,42 @@ led-0 { }; }; + ssi: spi-gpio { + compatible = "spi-gpio"; + #address-cells = <1>; + #size-cells = <0>; + num-chipselects = <1>; + + mosi-gpios = <&gpd 2 GPIO_ACTIVE_HIGH>; + miso-gpios = <&gpd 3 GPIO_ACTIVE_HIGH>; + sck-gpios = <&gpd 0 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpd 1 GPIO_ACTIVE_HIGH>; + + status = "okay"; + + spi-max-frequency = <50000000>; + + sc16is752: expander@0 { + compatible = "nxp,sc16is752"; + reg = <0>; /* CE0 */ + spi-max-frequency = <4000000>; + + clocks = <&exclk_sc16is752>; + + interrupt-parent = <&gpc>; + interrupts = <6 IRQ_TYPE_EDGE_FALLING>; + + gpio-controller; + #gpio-cells = <2>; + + exclk_sc16is752: sc16is752 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <48000000>; + }; + }; + }; + wlan_pwrseq: msc1-pwrseq { compatible = "mmc-pwrseq-simple"; @@ -43,13 +79,19 @@ &exclk { clock-frequency = <24000000>; }; -&tcu { - /* 1500 kHz for the system timer and clocksource */ - assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>; - assigned-clock-rates = <1500000>, <1500000>; +&cgu { + /* + * Use the 32.768 kHz oscillator as the parent of the RTC for a higher + * precision. + */ + assigned-clocks = <&cgu X1000_CLK_RTC>; + assigned-clock-parents = <&cgu X1000_CLK_RTCLK>; +}; - /* Use channel #0 for the system timer channel #2 for the clocksource */ - ingenic,pwm-channels-mask = <0xfa>; +&ost { + /* 1500 kHz for the system timer and clocksource */ + assigned-clocks = <&ost OST_CLK_PERCPU_TIMER>, <&ost OST_CLK_GLOBAL_TIMER>; + assigned-clock-rates = <1500000>, <1500000>; }; &uart2 { @@ -135,6 +177,14 @@ lan8720a: ethernet-phy@0 { }; }; +&otg_phy { + status = "okay"; +}; + +&otg { + status = "okay"; +}; + &pinctrl { pins_uart2: uart2 { function = "uart2"; diff --git a/arch/mips/boot/dts/ingenic/cu1830-neo.dts b/arch/mips/boot/dts/ingenic/cu1830-neo.dts index 640f96c00d63..cfcb40edb7d9 100644 --- a/arch/mips/boot/dts/ingenic/cu1830-neo.dts +++ b/arch/mips/boot/dts/ingenic/cu1830-neo.dts @@ -3,7 +3,7 @@ #include "x1830.dtsi" #include -#include +#include #include / { @@ -31,6 +31,42 @@ led-0 { }; }; + ssi0: spi-gpio { + compatible = "spi-gpio"; + #address-cells = <1>; + #size-cells = <0>; + num-chipselects = <1>; + + mosi-gpios = <&gpc 12 GPIO_ACTIVE_HIGH>; + miso-gpios = <&gpc 11 GPIO_ACTIVE_HIGH>; + sck-gpios = <&gpc 15 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpc 16 GPIO_ACTIVE_HIGH>; + + status = "okay"; + + spi-max-frequency = <50000000>; + + sc16is752: expander@0 { + compatible = "nxp,sc16is752"; + reg = <0>; /* CE0 */ + spi-max-frequency = <4000000>; + + clocks = <&exclk_sc16is752>; + + interrupt-parent = <&gpb>; + interrupts = <18 IRQ_TYPE_EDGE_FALLING>; + + gpio-controller; + #gpio-cells = <2>; + + exclk_sc16is752: sc16is752 { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <48000000>; + }; + }; + }; + wlan_pwrseq: msc1-pwrseq { compatible = "mmc-pwrseq-simple"; @@ -43,13 +79,19 @@ &exclk { clock-frequency = <24000000>; }; -&tcu { - /* 1500 kHz for the system timer and clocksource */ - assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>; - assigned-clock-rates = <1500000>, <1500000>; +&cgu { + /* + * Use the 32.768 kHz oscillator as the parent of the RTC for a higher + * precision. + */ + assigned-clocks = <&cgu X1830_CLK_RTC>; + assigned-clock-parents = <&cgu X1830_CLK_RTCLK>; +}; - /* Use channel #0 for the system timer channel #2 for the clocksource */ - ingenic,pwm-channels-mask = <0xfa>; +&ost { + /* 1500 kHz for the system timer and clocksource */ + assigned-clocks = <&ost OST_CLK_PERCPU_TIMER>, <&ost OST_CLK_GLOBAL_TIMER>; + assigned-clock-rates = <1500000>, <1500000>; }; &uart1 { @@ -73,6 +115,10 @@ ads7830: adc@48 { }; }; +&dtrng { + status = "okay"; +}; + &msc0 { status = "okay"; @@ -135,6 +181,14 @@ ip101gr: ethernet-phy@0 { }; }; +&otg_phy { + status = "okay"; +}; + +&otg { + status = "okay"; +}; + &pinctrl { pins_uart1: uart1 { function = "uart1"; diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index eee523678ce5..c1afdfdaa8a3 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -295,7 +295,7 @@ dmac: dma-controller@13020000 { clocks = <&cgu JZ4740_CLK_DMA>; }; - uhc: uhc@13030000 { + uhc: usb@13030000 { compatible = "ingenic,jz4740-ohci", "generic-ohci"; reg = <0x13030000 0x1000>; diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi index 018721a9eea9..05c00b93088e 100644 --- a/arch/mips/boot/dts/ingenic/jz4770.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi @@ -430,7 +430,7 @@ dmac1: dma-controller@13420100 { interrupts = <23>; }; - uhc: uhc@13430000 { + uhc: usb@13430000 { compatible = "generic-ohci"; reg = <0x13430000 0x1000>; diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index dfb5a7e1bb21..8d01feef7ff5 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -61,13 +61,34 @@ rtc: rtc { }; cgu: jz4780-cgu@10000000 { - compatible = "ingenic,jz4780-cgu"; + compatible = "ingenic,jz4780-cgu", "simple-mfd"; reg = <0x10000000 0x100>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x10000000 0x100>; + + #clock-cells = <1>; clocks = <&ext>, <&rtc>; clock-names = "ext", "rtc"; - #clock-cells = <1>; + otg_phy: usb-phy@3c { + compatible = "ingenic,jz4780-phy"; + reg = <0x3c 0x10>; + + clocks = <&cgu JZ4780_CLK_OTG1>; + + #phy-cells = <0>; + + status = "disabled"; + }; + + rng: rng@d8 { + compatible = "ingenic,jz4780-rng"; + reg = <0xd8 0x8>; + + status = "disabled"; + }; }; tcu: timer@10002000 { @@ -494,4 +515,24 @@ bch: bch@134d0000 { status = "disabled"; }; + + otg: usb@13500000 { + compatible = "ingenic,jz4780-otg", "snps,dwc2"; + reg = <0x13500000 0x40000>; + + interrupt-parent = <&intc>; + interrupts = <21>; + + clocks = <&cgu JZ4780_CLK_UHC>; + clock-names = "otg"; + + phys = <&otg_phy>; + phy-names = "usb2-phy"; + + g-rx-fifo-size = <768>; + g-np-tx-fifo-size = <256>; + g-tx-fifo-size = <256 256 256 256 256 256 256 512>; + + status = "disabled"; + }; }; diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi index 1f1f896dd1f7..aac9dedaf334 100644 --- a/arch/mips/boot/dts/ingenic/x1000.dtsi +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -52,13 +52,47 @@ rtclk: rtc { }; cgu: x1000-cgu@10000000 { - compatible = "ingenic,x1000-cgu"; + compatible = "ingenic,x1000-cgu", "simple-mfd"; reg = <0x10000000 0x100>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x10000000 0x100>; #clock-cells = <1>; clocks = <&exclk>, <&rtclk>; clock-names = "ext", "rtc"; + + otg_phy: usb-phy@3c { + compatible = "ingenic,x1000-phy"; + reg = <0x3c 0x10>; + + clocks = <&cgu X1000_CLK_OTGPHY>; + + #phy-cells = <0>; + + status = "disabled"; + }; + + rng: rng@d8 { + compatible = "ingenic,x1000-rng"; + reg = <0xd8 0x8>; + + status = "disabled"; + }; + }; + + ost: timer@12000000 { + compatible = "ingenic,x1000-ost"; + reg = <0x12000000 0x3c>; + + #clock-cells = <1>; + + clocks = <&cgu X1000_CLK_OST>; + clock-names = "ost"; + + interrupt-parent = <&cpuintc>; + interrupts = <3>; }; tcu: timer@10002000 { @@ -323,4 +357,24 @@ mdio: mdio { status = "disabled"; }; }; + + otg: usb@13500000 { + compatible = "ingenic,x1000-otg", "snps,dwc2"; + reg = <0x13500000 0x40000>; + + interrupt-parent = <&intc>; + interrupts = <21>; + + clocks = <&cgu X1000_CLK_OTG>; + clock-names = "otg"; + + phys = <&otg_phy>; + phy-names = "usb2-phy"; + + g-rx-fifo-size = <768>; + g-np-tx-fifo-size = <256>; + g-tx-fifo-size = <256 256 256 256 256 256 256 512>; + + status = "disabled"; + }; }; diff --git a/arch/mips/boot/dts/ingenic/x1830.dtsi b/arch/mips/boot/dts/ingenic/x1830.dtsi index b05dac3ae308..b21c93057356 100644 --- a/arch/mips/boot/dts/ingenic/x1830.dtsi +++ b/arch/mips/boot/dts/ingenic/x1830.dtsi @@ -52,13 +52,40 @@ rtclk: rtc { }; cgu: x1830-cgu@10000000 { - compatible = "ingenic,x1830-cgu"; + compatible = "ingenic,x1830-cgu", "simple-mfd"; reg = <0x10000000 0x100>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x10000000 0x100>; #clock-cells = <1>; clocks = <&exclk>, <&rtclk>; clock-names = "ext", "rtc"; + + otg_phy: usb-phy@3c { + compatible = "ingenic,x1830-phy"; + reg = <0x3c 0x10>; + + clocks = <&cgu X1830_CLK_OTGPHY>; + + #phy-cells = <0>; + + status = "disabled"; + }; + }; + + ost: timer@12000000 { + compatible = "ingenic,x1830-ost", "ingenic,x1000-ost"; + reg = <0x12000000 0x3c>; + + #clock-cells = <1>; + + clocks = <&cgu X1830_CLK_OST>; + clock-names = "ost"; + + interrupt-parent = <&cpuintc>; + interrupts = <4>; }; tcu: timer@10002000 { @@ -236,6 +263,15 @@ i2c2: i2c-controller@10052000 { status = "disabled"; }; + dtrng: trng@10072000 { + compatible = "ingenic,x1830-dtrng"; + reg = <0x10072000 0xc>; + + clocks = <&cgu X1830_CLK_DTRNG>; + + status = "disabled"; + }; + pdma: dma-controller@13420000 { compatible = "ingenic,x1830-dma"; reg = <0x13420000 0x400 @@ -311,4 +347,24 @@ mdio: mdio { status = "disabled"; }; }; + + otg: usb@13500000 { + compatible = "ingenic,x1830-otg", "snps,dwc2"; + reg = <0x13500000 0x40000>; + + interrupt-parent = <&intc>; + interrupts = <21>; + + clocks = <&cgu X1830_CLK_OTG>; + clock-names = "otg"; + + phys = <&otg_phy>; + phy-names = "usb2-phy"; + + g-rx-fifo-size = <768>; + g-np-tx-fifo-size = <256>; + g-tx-fifo-size = <256 256 256 256 256 256 256 512>; + + status = "disabled"; + }; }; diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile index eb71515871f6..eeb6b7aae83b 100644 --- a/arch/mips/boot/dts/mscc/Makefile +++ b/arch/mips/boot/dts/mscc/Makefile @@ -1,4 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only -dtb-$(CONFIG_MSCC_OCELOT) += ocelot_pcb123.dtb ocelot_pcb120.dtb +dtb-$(CONFIG_SOC_VCOREIII) += \ + jaguar2_pcb110.dtb \ + jaguar2_pcb111.dtb \ + jaguar2_pcb118.dtb \ + luton_pcb091.dtb \ + ocelot_pcb120.dtb \ + ocelot_pcb123.dtb \ + serval_pcb105.dtb \ + serval_pcb106.dtb + obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y)) diff --git a/arch/mips/boot/dts/mscc/jaguar2.dtsi b/arch/mips/boot/dts/mscc/jaguar2.dtsi new file mode 100644 index 000000000000..42b2b0a51ddc --- /dev/null +++ b/arch/mips/boot/dts/mscc/jaguar2.dtsi @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2020 Microsemi Corporation + */ + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "mscc,jr2"; + + aliases { + serial0 = &uart0; + serial1 = &uart2; + gpio0 = &gpio; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "mips,mips24KEc"; + device_type = "cpu"; + clocks = <&cpu_clk>; + reg = <0>; + }; + }; + + cpuintc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + cpu_clk: cpu-clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <500000000>; + }; + + ahb_clk: ahb-clk { + compatible = "fixed-factor-clock"; + #clock-cells = <0>; + clocks = <&cpu_clk>; + clock-div = <2>; + clock-mult = <1>; + }; + + ahb: ahb { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + interrupt-parent = <&intc>; + + cpu_ctrl: syscon@70000000 { + compatible = "mscc,ocelot-cpu-syscon", "syscon"; + reg = <0x70000000 0x2c>; + }; + + intc: interrupt-controller@70000070 { + compatible = "mscc,jaguar2-icpu-intr"; + reg = <0x70000070 0x94>; + #interrupt-cells = <1>; + interrupt-controller; + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + uart0: serial@70100000 { + pinctrl-0 = <&uart_pins>; + pinctrl-names = "default"; + compatible = "ns16550a"; + reg = <0x70100000 0x20>; + interrupts = <6>; + clocks = <&ahb_clk>; + reg-io-width = <4>; + reg-shift = <2>; + + status = "disabled"; + }; + + uart2: serial@70100800 { + pinctrl-0 = <&uart2_pins>; + pinctrl-names = "default"; + compatible = "ns16550a"; + reg = <0x70100800 0x20>; + interrupts = <7>; + clocks = <&ahb_clk>; + reg-io-width = <4>; + reg-shift = <2>; + + status = "disabled"; + }; + + gpio: pinctrl@71010038 { + compatible = "mscc,jaguar2-pinctrl"; + reg = <0x71010038 0x90>; + gpio-controller; + #gpio-cells = <2>; + gpio-ranges = <&gpio 0 0 64>; + + uart_pins: uart-pins { + pins = "GPIO_10", "GPIO_11"; + function = "uart"; + }; + + uart2_pins: uart2-pins { + pins = "GPIO_24", "GPIO_25"; + function = "uart2"; + }; + + cs1_pins: cs1-pins { + pins = "GPIO_16"; + function = "si"; + }; + + cs2_pins: cs2-pins { + pins = "GPIO_17"; + function = "si"; + }; + + cs3_pins: cs3-pins { + pins = "GPIO_18"; + function = "si"; + }; + + i2c_pins: i2c-pins { + pins = "GPIO_14", "GPIO_15"; + function = "twi"; + }; + + i2c2_pins: i2c2-pins { + pins = "GPIO_28", "GPIO_29"; + function = "twi2"; + }; + }; + + i2c0: i2c@70100400 { + compatible = "mscc,ocelot-i2c", "snps,designware-i2c"; + status = "disabled"; + pinctrl-0 = <&i2c_pins>; + pinctrl-names = "default"; + reg = <0x70100400 0x100>, <0x700001b8 0x8>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <8>; + clock-frequency = <100000>; + clocks = <&ahb_clk>; + }; + + i2c2: i2c@70100c00 { + compatible = "mscc,ocelot-i2c", "snps,designware-i2c"; + status = "disabled"; + pinctrl-0 = <&i2c2_pins>; + pinctrl-names = "default"; + reg = <0x70100c00 0x100>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <8>; + clock-frequency = <100000>; + clocks = <&ahb_clk>; + }; + }; +}; diff --git a/arch/mips/boot/dts/mscc/jaguar2_common.dtsi b/arch/mips/boot/dts/mscc/jaguar2_common.dtsi new file mode 100644 index 000000000000..679ff0d8eda8 --- /dev/null +++ b/arch/mips/boot/dts/mscc/jaguar2_common.dtsi @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2020 Microsemi Corporation + */ + +#include "jaguar2.dtsi" + +/ { + chosen { + stdout-path = "serial0:115200n8"; + }; +}; + +&uart0 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + +&i2c0 { + status = "okay"; + i2c-sda-hold-time-ns = <300>; +}; diff --git a/arch/mips/boot/dts/mscc/jaguar2_pcb110.dts b/arch/mips/boot/dts/mscc/jaguar2_pcb110.dts new file mode 100644 index 000000000000..d80cd6842b2a --- /dev/null +++ b/arch/mips/boot/dts/mscc/jaguar2_pcb110.dts @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2020 Microsemi Corporation + */ + +/dts-v1/; +#include "jaguar2_common.dtsi" +#include + +/ { + model = "Jaguar2 Cu8-Sfp16 PCB110 Reference Board"; + compatible = "mscc,jr2-pcb110", "mscc,jr2"; + + aliases { + i2c0 = &i2c0; + i2c108 = &i2c108; + i2c109 = &i2c109; + i2c110 = &i2c110; + i2c111 = &i2c111; + i2c112 = &i2c112; + i2c113 = &i2c113; + i2c114 = &i2c114; + i2c115 = &i2c115; + i2c116 = &i2c116; + i2c117 = &i2c117; + i2c118 = &i2c118; + i2c119 = &i2c119; + i2c120 = &i2c120; + i2c121 = &i2c121; + i2c122 = &i2c122; + i2c123 = &i2c123; + i2c124 = &i2c124; + i2c125 = &i2c125; + i2c126 = &i2c126; + i2c127 = &i2c127; + i2c128 = &i2c128; + i2c129 = &i2c129; + i2c130 = &i2c130; + i2c131 = &i2c131; + i2c149 = &i2c149; + i2c150 = &i2c150; + i2c151 = &i2c151; + i2c152 = &i2c152; + }; + i2c0_imux: i2c0-imux { + compatible = "i2c-mux-pinctrl"; + #address-cells = <1>; + #size-cells = <0>; + i2c-parent = <&i2c0>; + pinctrl-names = + "i2c149", "i2c150", "i2c151", "i2c152", "idle"; + pinctrl-0 = <&i2cmux_0>; + pinctrl-1 = <&i2cmux_1>; + pinctrl-2 = <&i2cmux_2>; + pinctrl-3 = <&i2cmux_3>; + pinctrl-4 = <&i2cmux_pins_i>; + i2c149: i2c@0 { + reg = <0x0>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c150: i2c@1 { + reg = <0x1>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c151: i2c@2 { + reg = <0x2>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c152: i2c@3 { + reg = <0x3>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; + i2c0_emux: i2c0-emux { + compatible = "i2c-mux-gpio"; + #address-cells = <1>; + #size-cells = <0>; + i2c-parent = <&i2c0>; + mux-gpios = <&gpio 51 GPIO_ACTIVE_HIGH + &gpio 52 GPIO_ACTIVE_HIGH + &gpio 53 GPIO_ACTIVE_HIGH + &gpio 58 GPIO_ACTIVE_HIGH + &gpio 59 GPIO_ACTIVE_HIGH>; + idle-state = <0x0>; + i2c108: i2c@10 { + reg = <0x10>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c109: i2c@11 { + reg = <0x11>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c110: i2c@12 { + reg = <0x12>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c111: i2c@13 { + reg = <0x13>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c112: i2c@14 { + reg = <0x14>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c113: i2c@15 { + reg = <0x15>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c114: i2c@16 { + reg = <0x16>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c115: i2c@17 { + reg = <0x17>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c116: i2c@8 { + reg = <0x8>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c117: i2c@9 { + reg = <0x9>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c118: i2c@a { + reg = <0xa>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c119: i2c@b { + reg = <0xb>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c120: i2c@c { + reg = <0xc>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c121: i2c@d { + reg = <0xd>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c122: i2c@e { + reg = <0xe>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c123: i2c@f { + reg = <0xf>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; +}; + +&gpio { + synce_pins: synce-pins { + // GPIO 16 == SI_nCS1 + pins = "GPIO_16"; + function = "si"; + }; + synce_builtin_pins: synce-builtin-pins { + // GPIO 49 == SI_nCS13 + pins = "GPIO_49"; + function = "si"; + }; + i2cmux_pins_i: i2cmux-pins-i { + pins = "GPIO_17", "GPIO_18", "GPIO_20", "GPIO_21"; + function = "twi_scl_m"; + output-low; + }; + i2cmux_0: i2cmux-0 { + pins = "GPIO_17"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_1: i2cmux-1 { + pins = "GPIO_18"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_2: i2cmux-2 { + pins = "GPIO_20"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_3: i2cmux-3 { + pins = "GPIO_21"; + function = "twi_scl_m"; + output-high; + }; +}; + +&i2c0 { + pca9545@70 { + compatible = "nxp,pca9545"; + reg = <0x70>; + #address-cells = <1>; + #size-cells = <0>; + i2c-mux-idle-disconnect; + i2c124: i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + }; + i2c125: i2c@1 { + /* FMC B */ + #address-cells = <1>; + #size-cells = <0>; + reg = <1>; + }; + i2c126: i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + reg = <2>; + }; + i2c127: i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <3>; + }; + }; + pca9545@71 { + compatible = "nxp,pca9545"; + reg = <0x71>; + #address-cells = <1>; + #size-cells = <0>; + i2c-mux-idle-disconnect; + i2c128: i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + }; + i2c129: i2c@1 { + /* FMC B */ + #address-cells = <1>; + #size-cells = <0>; + reg = <1>; + }; + i2c130: i2c@2 { + #address-cells = <1>; + #size-cells = <0>; + reg = <2>; + }; + i2c131: i2c@3 { + #address-cells = <1>; + #size-cells = <0>; + reg = <3>; + }; + }; +}; diff --git a/arch/mips/boot/dts/mscc/jaguar2_pcb111.dts b/arch/mips/boot/dts/mscc/jaguar2_pcb111.dts new file mode 100644 index 000000000000..813c5e16013c --- /dev/null +++ b/arch/mips/boot/dts/mscc/jaguar2_pcb111.dts @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2018 Microsemi Corporation + */ + +/dts-v1/; +#include "jaguar2_common.dtsi" + +/ { + model = "Jaguar2 Cu48 PCB111 Reference Board"; + compatible = "mscc,jr2-pcb111", "mscc,jr2"; + + aliases { + i2c0 = &i2c0; + i2c149 = &i2c149; + i2c150 = &i2c150; + i2c151 = &i2c151; + i2c152 = &i2c152; + i2c203 = &i2c203; + }; + + i2c0_imux: i2c0-imux { + compatible = "i2c-mux-pinctrl"; + #address-cells = <1>; + #size-cells = <0>; + i2c-parent = <&i2c0>; + pinctrl-names = + "i2c149", "i2c150", "i2c151", "i2c152", "i2c203", "idle"; + pinctrl-0 = <&i2cmux_0>; + pinctrl-1 = <&i2cmux_1>; + pinctrl-2 = <&i2cmux_2>; + pinctrl-3 = <&i2cmux_3>; + pinctrl-4 = <&i2cmux_pins_i>; // Added by convention for PoE + pinctrl-5 = <&i2cmux_pins_i>; + i2c149: i2c@0 { + reg = <0x0>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c150: i2c@1 { + reg = <0x1>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c151: i2c@2 { + reg = <0x2>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c152: i2c@3 { + reg = <0x3>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c203: i2c@4 { + reg = <0x4>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; +}; + +&gpio { + synce_builtin_pins: synce-builtin-pins { + // GPIO 49 == SI_nCS13 + pins = "GPIO_49"; + function = "si"; + }; + cpld_pins: cpld-pins { + // GPIO 50 == SI_nCS14 + pins = "GPIO_50"; + function = "si"; + }; + cpld_fifo_pins: synce-builtin-pins { + // GPIO 51 == SI_nCS15 + pins = "GPIO_51"; + function = "si"; + }; +}; + +&gpio { + i2cmux_pins_i: i2cmux-pins-i { + pins = "GPIO_17", "GPIO_18"; + function = "twi_scl_m"; + output-low; + }; + i2cmux_0: i2cmux-0 { + pins = "GPIO_17"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_1: i2cmux-1 { + pins = "GPIO_18"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_2: i2cmux-2 { + pins = "GPIO_20"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_3: i2cmux-3 { + pins = "GPIO_21"; + function = "twi_scl_m"; + output-high; + }; +}; diff --git a/arch/mips/boot/dts/mscc/jaguar2_pcb118.dts b/arch/mips/boot/dts/mscc/jaguar2_pcb118.dts new file mode 100644 index 000000000000..27c644f2d17f --- /dev/null +++ b/arch/mips/boot/dts/mscc/jaguar2_pcb118.dts @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2018 Microsemi Corporation + */ + +/dts-v1/; +#include "jaguar2_common.dtsi" + +/ { + model = "Jaguar2/Aquantia PCB118 Reference Board"; + compatible = "mscc,jr2-pcb118", "mscc,jr2"; + + aliases { + i2c150 = &i2c150; + i2c151 = &i2c151; + }; + + i2c0_imux: i2c0-imux { + compatible = "i2c-mux-pinctrl"; + #address-cells = <1>; + #size-cells = <0>; + i2c-parent = <&i2c0>; + pinctrl-names = + "i2c150", "i2c151", "idle"; + pinctrl-0 = <&i2cmux_0>; + pinctrl-1 = <&i2cmux_1>; + pinctrl-2 = <&i2cmux_pins_i>; + i2c150: i2c@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c151: i2c@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; +}; + +&gpio { + i2cmux_pins_i: i2cmux-pins-i { + pins = "GPIO_17", "GPIO_16"; + function = "twi_scl_m"; + output-low; + }; + i2cmux_0: i2cmux-0 { + pins = "GPIO_17"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_1: i2cmux-1 { + pins = "GPIO_16"; + function = "twi_scl_m"; + output-high; + }; +}; diff --git a/arch/mips/boot/dts/mscc/luton.dtsi b/arch/mips/boot/dts/mscc/luton.dtsi new file mode 100644 index 000000000000..2a170b84c5a9 --- /dev/null +++ b/arch/mips/boot/dts/mscc/luton.dtsi @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020 Microsemi Corporation */ + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "mscc,luton"; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "mips,mips24KEc"; + device_type = "cpu"; + clocks = <&cpu_clk>; + reg = <0>; + }; + }; + + aliases { + serial0 = &uart0; + }; + + cpuintc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + cpu_clk: cpu-clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <416666666>; + }; + + ahb_clk: ahb-clk { + compatible = "fixed-factor-clock"; + #clock-cells = <0>; + clocks = <&cpu_clk>; + clock-div = <2>; + clock-mult = <1>; + }; + + ahb@60000000 { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x60000000 0x20000000>; + + interrupt-parent = <&intc>; + + cpu_ctrl: syscon@10000000 { + compatible = "mscc,ocelot-cpu-syscon", "syscon"; + reg = <0x10000000 0x2c>; + }; + + intc: interrupt-controller@10000084 { + compatible = "mscc,luton-icpu-intr"; + reg = <0x10000084 0x70>; + #interrupt-cells = <1>; + interrupt-controller; + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + uart0: serial@10100000 { + pinctrl-0 = <&uart_pins>; + pinctrl-names = "default"; + compatible = "ns16550a"; + reg = <0x10100000 0x20>; + interrupts = <6>; + clocks = <&ahb_clk>; + reg-io-width = <4>; + reg-shift = <2>; + + status = "disabled"; + }; + + i2c0: i2c@10100400 { + compatible = "mscc,ocelot-i2c", "snps,designware-i2c"; + pinctrl-0 = <&i2c_pins>; + pinctrl-names = "default"; + reg = <0x10100400 0x100>, <0x100002a4 0x8>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <11>; + clocks = <&ahb_clk>; + + status = "disabled"; + }; + + gpio: pinctrl@70068 { + compatible = "mscc,luton-pinctrl"; + reg = <0x70068 0x28>; + gpio-controller; + #gpio-cells = <2>; + gpio-ranges = <&gpio 0 0 32>; + interrupt-controller; + interrupts = <13>; + #interrupt-cells = <2>; + + i2c_pins: i2c-pins { + pins = "GPIO_5", "GPIO_6"; + function = "twi"; + }; + + uart_pins: uart-pins { + pins = "GPIO_30", "GPIO_31"; + function = "uart"; + }; + + }; + }; +}; diff --git a/arch/mips/boot/dts/mscc/luton_pcb091.dts b/arch/mips/boot/dts/mscc/luton_pcb091.dts new file mode 100644 index 000000000000..26ef6285d71d --- /dev/null +++ b/arch/mips/boot/dts/mscc/luton_pcb091.dts @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2020 Microsemi Corporation + */ + +/dts-v1/; + +#include "luton.dtsi" + +/ { + model = "Luton10 PCB091 Reference Board"; + compatible = "mscc,luton-pcb091", "mscc,luton"; + + aliases { + serial0 = &uart0; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; +}; + +&uart0 { + status = "okay"; +}; + +&i2c0 { + status = "okay"; + i2c-sda-hold-time-ns = <300>; +}; diff --git a/arch/mips/boot/dts/mscc/serval.dtsi b/arch/mips/boot/dts/mscc/serval.dtsi new file mode 100644 index 000000000000..089ce89df190 --- /dev/null +++ b/arch/mips/boot/dts/mscc/serval.dtsi @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2018 Microsemi Corporation + */ + +/ { + #address-cells = <1>; + #size-cells = <1>; + compatible = "mscc,serval"; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "mips,mips24KEc"; + device_type = "cpu"; + clocks = <&cpu_clk>; + reg = <0>; + }; + }; + + aliases { + serial0 = &uart0; + gpio0 = &gpio; + }; + + cpuintc: interrupt-controller { + #address-cells = <0>; + #interrupt-cells = <1>; + interrupt-controller; + compatible = "mti,cpu-interrupt-controller"; + }; + + cpu_clk: cpu-clock { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <416666666>; + }; + + ahb_clk: ahb-clk { + compatible = "fixed-factor-clock"; + #clock-cells = <0>; + clocks = <&cpu_clk>; + clock-div = <2>; + clock-mult = <1>; + }; + + ahb: ahb { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges; + + interrupt-parent = <&intc>; + + cpu_ctrl: syscon@70000000 { + compatible = "mscc,ocelot-cpu-syscon", "syscon"; + reg = <0x70000000 0x2c>; + }; + + intc: interrupt-controller@70000070 { + compatible = "mscc,serval-icpu-intr"; + reg = <0x70000070 0x70>; + #interrupt-cells = <1>; + interrupt-controller; + interrupt-parent = <&cpuintc>; + interrupts = <2>; + }; + + uart0: serial@70100000 { + pinctrl-0 = <&uart_pins>; + pinctrl-names = "default"; + compatible = "ns16550a"; + reg = <0x70100000 0x20>; + interrupts = <6>; + clocks = <&ahb_clk>; + reg-io-width = <4>; + reg-shift = <2>; + + status = "disabled"; + }; + + uart2: serial@70100800 { + pinctrl-0 = <&uart2_pins>; + pinctrl-names = "default"; + compatible = "ns16550a"; + reg = <0x70100800 0x20>; + interrupts = <7>; + clocks = <&ahb_clk>; + reg-io-width = <4>; + reg-shift = <2>; + + status = "disabled"; + }; + + gpio: pinctrl@71070034 { + compatible = "mscc,serval-pinctrl"; + reg = <0x71070034 0x28>; + gpio-controller; + #gpio-cells = <2>; + gpio-ranges = <&gpio 0 0 22>; + + sgpio_pins: sgpio-pins { + pins = "GPIO_0", "GPIO_2", "GPIO_3", "GPIO_1"; + function = "sg0"; + }; + + i2c_pins: i2c-pins { + pins = "GPIO_6", "GPIO_7"; + function = "twi"; + }; + + uart_pins: uart-pins { + pins = "GPIO_26", "GPIO_27"; + function = "uart"; + }; + + uart2_pins: uart2-pins { + pins = "GPIO_13", "GPIO_14"; + function = "uart2"; + }; + + cs1_pins: cs1-pins { + pins = "GPIO_8"; + function = "si"; + }; + + irqext0_pins: irqext0-pins { + pins = "GPIO_28"; + function = "irq0"; + }; + + irqext1_pins: irqext1-pins { + pins = "GPIO_29"; + function = "irq1"; + }; + }; + + i2c0: i2c@70100400 { + compatible = "mscc,ocelot-i2c", "snps,designware-i2c"; + status = "disabled"; + pinctrl-0 = <&i2c_pins>; + pinctrl-names = "default"; + reg = <0x70100400 0x100>, <0x70000190 0x8>; + #address-cells = <1>; + #size-cells = <0>; + interrupts = <8>; + clock-frequency = <100000>; + clocks = <&ahb_clk>; + }; + }; +}; diff --git a/arch/mips/boot/dts/mscc/serval_common.dtsi b/arch/mips/boot/dts/mscc/serval_common.dtsi new file mode 100644 index 000000000000..5b404836db5e --- /dev/null +++ b/arch/mips/boot/dts/mscc/serval_common.dtsi @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2020 Microsemi Corporation + */ + +#include "serval.dtsi" + +/ { + aliases { + serial0 = &uart0; + i2c104 = &i2c104; + i2c105 = &i2c105; + i2c106 = &i2c106; + i2c107 = &i2c107; + i2c108 = &i2c108; + i2c109 = &i2c109; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + i2c0_imux: i2c0-imux{ + compatible = "i2c-mux-pinctrl"; + #address-cells = <1>; + #size-cells = <0>; + i2c-parent = <&i2c0>; + pinctrl-names = + "i2c104", "i2c105", "i2c106", "i2c107", + "i2c108", "i2c109", "idle"; + pinctrl-0 = <&i2cmux_0>; + pinctrl-1 = <&i2cmux_1>; + pinctrl-2 = <&i2cmux_2>; + pinctrl-3 = <&i2cmux_3>; + pinctrl-4 = <&i2cmux_4>; + pinctrl-5 = <&i2cmux_5>; + pinctrl-6 = <&i2cmux_pins_i>; + i2c104: i2c_sfp0@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c105: i2c_sfp1@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c106: i2c_sfp2@2 { + reg = <2>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c107: i2c_sfp3@3 { + reg = <3>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c108: i2c_sfp4@4 { + reg = <4>; + #address-cells = <1>; + #size-cells = <0>; + }; + i2c109: i2c_sfp5@5 { + reg = <5>; + #address-cells = <1>; + #size-cells = <0>; + }; +}; + +}; + +&uart0 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + +&gpio { + i2c_pins: i2c-pins { + pins = "GPIO_7"; /* No "default" scl for i2c0 */ + function = "twi"; + }; + i2cmux_pins_i: i2cmux-pins-i { + pins = "GPIO_11", "GPIO_12", "GPIO_18", "GPIO_19", + "GPIO_20", "GPIO_21"; + function = "twi_scl_m"; + output-low; + }; + i2cmux_0: i2cmux-0 { + pins = "GPIO_11"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_1: i2cmux-1 { + pins = "GPIO_12"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_2: i2cmux-2 { + pins = "GPIO_18"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_3: i2cmux-3 { + pins = "GPIO_19"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_4: i2cmux-4 { + pins = "GPIO_20"; + function = "twi_scl_m"; + output-high; + }; + i2cmux_5: i2cmux-5 { + pins = "GPIO_21"; + function = "twi_scl_m"; + output-high; + }; +}; + +&i2c0 { + status = "okay"; + i2c-sda-hold-time-ns = <300>; +}; + diff --git a/arch/mips/boot/dts/mscc/serval_pcb105.dts b/arch/mips/boot/dts/mscc/serval_pcb105.dts new file mode 100644 index 000000000000..a1b0012b79d3 --- /dev/null +++ b/arch/mips/boot/dts/mscc/serval_pcb105.dts @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2018 Microsemi Corporation + */ + +/dts-v1/; +#include "serval_common.dtsi" + +/ { + model = "Serval PCB105 Reference Board"; + compatible = "mscc,serval-pcb105", "mscc,serval"; + + aliases { + }; + +}; + diff --git a/arch/mips/boot/dts/mscc/serval_pcb106.dts b/arch/mips/boot/dts/mscc/serval_pcb106.dts new file mode 100644 index 000000000000..237be7c8da57 --- /dev/null +++ b/arch/mips/boot/dts/mscc/serval_pcb106.dts @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: (GPL-2.0+ OR MIT) +/* + * Copyright (c) 2018 Microsemi Corporation + */ + +/dts-v1/; +#include "serval_common.dtsi" + +/ { + model = "Serval PCB106 Reference Board"; + compatible = "mscc,serval-pcb106", "mscc,serval"; + + aliases { + }; + +}; + diff --git a/arch/mips/boot/dts/mti/sead3.dts b/arch/mips/boot/dts/mti/sead3.dts index 192c26ff1d3d..1cf6728af8fe 100644 --- a/arch/mips/boot/dts/mti/sead3.dts +++ b/arch/mips/boot/dts/mti/sead3.dts @@ -56,7 +56,7 @@ gic: interrupt-controller@1b1c0000 { interrupt-parent = <&cpu_intc>; }; - ehci@1b200000 { + usb@1b200000 { compatible = "generic-ehci"; reg = <0x1b200000 0x1000>; diff --git a/arch/mips/boot/dts/ralink/mt7628a.dtsi b/arch/mips/boot/dts/ralink/mt7628a.dtsi index 892e8ab863c5..45bf96a3d17a 100644 --- a/arch/mips/boot/dts/ralink/mt7628a.dtsi +++ b/arch/mips/boot/dts/ralink/mt7628a.dtsi @@ -275,7 +275,7 @@ usb_phy: usb-phy@10120000 { reset-names = "host", "device"; }; - ehci@101c0000 { + usb@101c0000 { compatible = "generic-ehci"; reg = <0x101c0000 0x1000>; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 6501a842c41a..bd47e15d02c7 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -1505,10 +1505,20 @@ static int __init octeon_irq_init_ciu( goto err; } + r = irq_alloc_desc_at(OCTEON_IRQ_MBOX0, -1); + if (r < 0) { + pr_err("Failed to allocate desc for %s\n", "OCTEON_IRQ_MBOX0"); + goto err; + } r = octeon_irq_set_ciu_mapping( OCTEON_IRQ_MBOX0, 0, 32, 0, chip_mbox, handle_percpu_irq); if (r) goto err; + r = irq_alloc_desc_at(OCTEON_IRQ_MBOX1, -1); + if (r < 0) { + pr_err("Failed to allocate desc for %s\n", "OCTEON_IRQ_MBOX1"); + goto err; + } r = octeon_irq_set_ciu_mapping( OCTEON_IRQ_MBOX1, 0, 33, 0, chip_mbox, handle_percpu_irq); if (r) @@ -1546,6 +1556,11 @@ static int __init octeon_irq_init_ciu( if (r) goto err; + r = irq_alloc_descs(OCTEON_IRQ_WDOG0, OCTEON_IRQ_WDOG0, 16, -1); + if (r < 0) { + pr_err("Failed to allocate desc for %s\n", "OCTEON_IRQ_WDOGx"); + goto err; + } /* CIU_1 */ for (i = 0; i < 16; i++) { r = octeon_irq_set_ciu_mapping( diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 561389d3fadb..982826ba0ef7 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -973,8 +973,6 @@ void __init plat_mem_setup(void) uint64_t crashk_end; #ifndef CONFIG_CRASH_DUMP int64_t memory; - uint64_t kernel_start; - uint64_t kernel_size; #endif total = 0; @@ -1078,13 +1076,6 @@ void __init plat_mem_setup(void) } } cvmx_bootmem_unlock(); - /* Add the memory region for the kernel. */ - kernel_start = (unsigned long) _text; - kernel_size = _end - _text; - - /* Adjust for physical offset. */ - kernel_start &= ~0xffffffff80000000ULL; - memblock_add(kernel_start, kernel_size); #endif /* CONFIG_CRASH_DUMP */ #ifdef CONFIG_CAVIUM_RESERVE32 diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 076db9a06b5e..66ce5527da54 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -290,9 +290,6 @@ static int octeon_cpu_disable(void) { unsigned int cpu = smp_processor_id(); - if (cpu == 0) - return -EBUSY; - if (!octeon_bootloader_entry_addr) return -ENOTSUPP; diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig index 052c5ad0f2b1..ab7ebb066834 100644 --- a/arch/mips/configs/ci20_defconfig +++ b/arch/mips/configs/ci20_defconfig @@ -49,6 +49,8 @@ CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_JZ4780=y CONFIG_MTD_UBI=y CONFIG_MTD_UBI_FASTMAP=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_BROADCOM is not set @@ -77,7 +79,6 @@ CONFIG_SERIAL_8250_NR_UARTS=5 CONFIG_SERIAL_8250_RUNTIME_UARTS=5 CONFIG_SERIAL_8250_INGENIC=y CONFIG_SERIAL_OF_PLATFORM=y -# CONFIG_HW_RANDOM is not set CONFIG_I2C=y CONFIG_I2C_JZ4780=y CONFIG_SPI=y @@ -99,7 +100,12 @@ CONFIG_IR_GPIO_TX=m CONFIG_MEDIA_SUPPORT=m # CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_USB=y +CONFIG_USB_STORAGE=y +CONFIG_USB_DWC2=y +CONFIG_USB_SERIAL=y +CONFIG_USB_SERIAL_CH341=y +CONFIG_USB_GADGET=y CONFIG_MMC=y CONFIG_MMC_JZ4740=y CONFIG_NEW_LEDS=y @@ -131,8 +137,13 @@ CONFIG_MEMORY=y CONFIG_JZ4780_NEMC=y CONFIG_PWM=y CONFIG_PWM_JZ4740=m +CONFIG_JZ4780_EFUSE=y +CONFIG_JZ4770_PHY=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set +CONFIG_AUTOFS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_UTF8=y CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set CONFIG_TMPFS=y diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig index 55d0690a3ffe..9d75f5b77d5d 100644 --- a/arch/mips/configs/cu1000-neo_defconfig +++ b/arch/mips/configs/cu1000-neo_defconfig @@ -25,6 +25,7 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set # CONFIG_SUSPEND is not set +CONFIG_MODULES=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set CONFIG_CMA=y @@ -32,15 +33,17 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -CONFIG_CFG80211=y +CONFIG_CFG80211=m CONFIG_UEVENT_HELPER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y # CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y CONFIG_STMMAC_ETH=y CONFIG_SMSC_PHY=y -CONFIG_BRCMFMAC=y +CONFIG_BRCMFMAC=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -52,16 +55,25 @@ CONFIG_SERIAL_8250_NR_UARTS=3 CONFIG_SERIAL_8250_RUNTIME_UARTS=3 CONFIG_SERIAL_8250_INGENIC=y CONFIG_SERIAL_OF_PLATFORM=y -# CONFIG_HW_RANDOM is not set +CONFIG_SERIAL_SC16IS7XX=y +# CONFIG_SERIAL_SC16IS7XX_I2C is not set +CONFIG_SERIAL_SC16IS7XX_SPI=y CONFIG_I2C=y CONFIG_I2C_JZ4780=y +CONFIG_SPI=y +CONFIG_SPI_GPIO=y CONFIG_GPIO_SYSFS=y -CONFIG_SENSORS_ADS7828=y +CONFIG_SENSORS_ADS7828=m CONFIG_WATCHDOG=y CONFIG_JZ4740_WDT=y # CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_USB=y +CONFIG_USB_STORAGE=y +CONFIG_USB_DWC2=y +CONFIG_USB_SERIAL=y +CONFIG_USB_SERIAL_CH341=y +CONFIG_USB_GADGET=y CONFIG_MMC=y CONFIG_MMC_JZ4740=y CONFIG_NEW_LEDS=y @@ -72,16 +84,22 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_JZ4740=y CONFIG_DMADEVICES=y CONFIG_DMA_JZ4780=y +# CONFIG_INGENIC_TIMER is not set +CONFIG_INGENIC_SYSOST=y # CONFIG_IOMMU_SUPPORT is not set +CONFIG_JZ4770_PHY=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_AUTOFS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_UTF8=y CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_NFS_FS=y CONFIG_NLS=y +CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_936=y CONFIG_NLS_CODEPAGE_950=y CONFIG_NLS_ASCII=y diff --git a/arch/mips/configs/cu1830-neo_defconfig b/arch/mips/configs/cu1830-neo_defconfig index e7064851a47a..29decd0003c6 100644 --- a/arch/mips/configs/cu1830-neo_defconfig +++ b/arch/mips/configs/cu1830-neo_defconfig @@ -25,6 +25,7 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set # CONFIG_SUSPEND is not set +CONFIG_MODULES=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set CONFIG_CMA=y @@ -32,18 +33,20 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -CONFIG_CFG80211=y +CONFIG_CFG80211=m CONFIG_UEVENT_HELPER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y # CONFIG_ALLOW_DEV_COREDUMP is not set +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_BLK_DEV_DM=y +CONFIG_BLK_DEV_MD=m +CONFIG_BLK_DEV_DM=m CONFIG_NETDEVICES=y CONFIG_STMMAC_ETH=y CONFIG_ICPLUS_PHY=y -CONFIG_BRCMFMAC=y +CONFIG_BRCMFMAC=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -55,16 +58,25 @@ CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 CONFIG_SERIAL_8250_INGENIC=y CONFIG_SERIAL_OF_PLATFORM=y -# CONFIG_HW_RANDOM is not set +CONFIG_SERIAL_SC16IS7XX=y +# CONFIG_SERIAL_SC16IS7XX_I2C is not set +CONFIG_SERIAL_SC16IS7XX_SPI=y CONFIG_I2C=y CONFIG_I2C_JZ4780=y +CONFIG_SPI=y +CONFIG_SPI_GPIO=y CONFIG_GPIO_SYSFS=y -CONFIG_SENSORS_ADS7828=y +CONFIG_SENSORS_ADS7828=m CONFIG_WATCHDOG=y CONFIG_JZ4740_WDT=y # CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_USB=y +CONFIG_USB_STORAGE=y +CONFIG_USB_DWC2=y +CONFIG_USB_SERIAL=y +CONFIG_USB_SERIAL_CH341=y +CONFIG_USB_GADGET=y CONFIG_MMC=y CONFIG_MMC_JZ4740=y CONFIG_NEW_LEDS=y @@ -75,16 +87,22 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_JZ4740=y CONFIG_DMADEVICES=y CONFIG_DMA_JZ4780=y +# CONFIG_INGENIC_TIMER is not set +CONFIG_INGENIC_SYSOST=y # CONFIG_IOMMU_SUPPORT is not set +CONFIG_JZ4770_PHY=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_AUTOFS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_UTF8=y CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_NFS_FS=y CONFIG_NLS=y +CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_936=y CONFIG_NLS_CODEPAGE_950=y CONFIG_NLS_ASCII=y diff --git a/arch/mips/configs/gcw0_defconfig b/arch/mips/configs/gcw0_defconfig index 7e28a4fe9d84..460683b52285 100644 --- a/arch/mips/configs/gcw0_defconfig +++ b/arch/mips/configs/gcw0_defconfig @@ -73,7 +73,6 @@ CONFIG_DRM_PANEL_NOVATEK_NT39016=y CONFIG_DRM_INGENIC=y CONFIG_DRM_ETNAVIV=y CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_GENERIC is not set CONFIG_BACKLIGHT_PWM=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig index 8a921c8ac233..5cb91509bb7c 100644 --- a/arch/mips/configs/gpr_defconfig +++ b/arch/mips/configs/gpr_defconfig @@ -249,7 +249,6 @@ CONFIG_SSB_DRIVER_PCICORE=y # CONFIG_VGA_ARB is not set # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_GENERIC is not set # CONFIG_VGA_CONSOLE is not set CONFIG_USB_HID=m CONFIG_USB_HIDDEV=y diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig index 3a9a453b1264..688c91918db2 100644 --- a/arch/mips/configs/lemote2f_defconfig +++ b/arch/mips/configs/lemote2f_defconfig @@ -145,7 +145,6 @@ CONFIG_FB_SIS_300=y CONFIG_FB_SIS_315=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -CONFIG_BACKLIGHT_GENERIC=m # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 38a817ead8e7..9c5fadef38cb 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -286,7 +286,6 @@ CONFIG_DRM_VIRTIO_GPU=y CONFIG_FB_RADEON=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m -CONFIG_BACKLIGHT_GENERIC=m # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig index d1c3ff7f00bf..bbe0f39f8088 100644 --- a/arch/mips/configs/mtx1_defconfig +++ b/arch/mips/configs/mtx1_defconfig @@ -448,7 +448,6 @@ CONFIG_WDT_MTX1=y # CONFIG_VGA_ARB is not set # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_GENERIC is not set # CONFIG_VGA_CONSOLE is not set CONFIG_SOUND=m CONFIG_SND=m diff --git a/arch/mips/configs/rs90_defconfig b/arch/mips/configs/rs90_defconfig index dfbb9fed9a42..4f540bb94628 100644 --- a/arch/mips/configs/rs90_defconfig +++ b/arch/mips/configs/rs90_defconfig @@ -97,7 +97,6 @@ CONFIG_DRM_FBDEV_OVERALLOC=300 CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_INGENIC=y CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_GENERIC is not set CONFIG_BACKLIGHT_PWM=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig index 55d9aed7ced9..657dd93c5e76 100644 --- a/arch/mips/generic/Kconfig +++ b/arch/mips/generic/Kconfig @@ -31,17 +31,22 @@ comment "MSCC Ocelot doesn't work with SEAD3 enabled" depends on LEGACY_BOARD_SEAD3 config LEGACY_BOARD_OCELOT - bool "Support MSCC Ocelot boards" + bool "Legacy support for Ocelot based boards" depends on LEGACY_BOARD_SEAD3=n select LEGACY_BOARDS - select MSCC_OCELOT + select SOC_VCOREIII select SYS_HAS_EARLY_PRINTK select USE_GENERIC_EARLY_PRINTK_8250 -config MSCC_OCELOT +config SOC_VCOREIII bool select GPIOLIB select MSCC_OCELOT_IRQ + select MSCC_OCELOT #will be removed when driver no more use it + +#Will be removed when the driver using it will be converted to SOC_VCOREIII +config MSCC_OCELOT + bool comment "FIT/UHI Boards" @@ -67,12 +72,36 @@ config FIT_IMAGE_FDT_XILFPGA config FIT_IMAGE_FDT_OCELOT bool "Include FDT for Microsemi Ocelot development platforms" - select MSCC_OCELOT + select SOC_VCOREIII help Enable this to include the FDT for the Ocelot development platforms from Microsemi in the FIT kernel image. This requires u-boot on the platform. +config FIT_IMAGE_FDT_LUTON + bool "Include FDT for Microsemi Luton development platforms" + select SOC_VCOREIII + help + Enable this to include the FDT for the Luton development platforms + from Microsemi in the FIT kernel image. + This requires u-boot on the platform. + +config FIT_IMAGE_FDT_JAGUAR2 + bool "Include FDT for Microsemi Jaguar2 development platforms" + select SOC_VCOREIII + help + Enable this to include the FDT for the Jaguar2 development platforms + from Microsemi in the FIT kernel image. + This requires u-boot on the platform. + +config FIT_IMAGE_FDT_SERVAL + bool "Include FDT for Microsemi Serval development platforms" + select SOC_VCOREIII + help + Enable this to include the FDT for the Serval development platforms + from Microsemi in the FIT kernel image. + This requires u-boot on the platform. + config BOARD_INGENIC bool "Support boards based on Ingenic SoCs" select MACH_INGENIC_GENERIC diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform index f8ef2f9d107e..b871af16b5b6 100644 --- a/arch/mips/generic/Platform +++ b/arch/mips/generic/Platform @@ -20,4 +20,7 @@ its-y := vmlinux.its.S its-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += board-boston.its.S its-$(CONFIG_FIT_IMAGE_FDT_NI169445) += board-ni169445.its.S its-$(CONFIG_FIT_IMAGE_FDT_OCELOT) += board-ocelot.its.S +its-$(CONFIG_FIT_IMAGE_FDT_LUTON) += board-luton.its.S +its-$(CONFIG_FIT_IMAGE_FDT_JAGUAR2) += board-jaguar2.its.S +its-$(CONFIG_FIT_IMAGE_FDT_SERVAL) += board-serval.its.S its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += board-xilfpga.its.S diff --git a/arch/mips/generic/board-jaguar2.its.S b/arch/mips/generic/board-jaguar2.its.S new file mode 100644 index 000000000000..fb0e589eeff7 --- /dev/null +++ b/arch/mips/generic/board-jaguar2.its.S @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/ { + images { + fdt@jaguar2_pcb110 { + description = "MSCC Jaguar2 PCB110 Device Tree"; + data = /incbin/("boot/dts/mscc/jaguar2_pcb110.dtb"); + type = "flat_dt"; + arch = "mips"; + compression = "none"; + hash@0 { + algo = "sha1"; + }; + }; + fdt@jaguar2_pcb111 { + description = "MSCC Jaguar2 PCB111 Device Tree"; + data = /incbin/("boot/dts/mscc/jaguar2_pcb111.dtb"); + type = "flat_dt"; + arch = "mips"; + compression = "none"; + hash@0 { + algo = "sha1"; + }; + }; + }; + + configurations { + pcb110 { + description = "Jaguar2 Linux kernel"; + kernel = "kernel@0"; + fdt = "fdt@jaguar2_pcb110"; + ramdisk = "ramdisk"; + }; + pcb111 { + description = "Jaguar2 Linux kernel"; + kernel = "kernel@0"; + fdt = "fdt@jaguar2_pcb111"; + ramdisk = "ramdisk"; + }; + }; +}; diff --git a/arch/mips/generic/board-luton.its.S b/arch/mips/generic/board-luton.its.S new file mode 100644 index 000000000000..39a543f62f25 --- /dev/null +++ b/arch/mips/generic/board-luton.its.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/ { + images { + fdt@luton_pcb091 { + description = "MSCC Luton PCB091 Device Tree"; + data = /incbin/("boot/dts/mscc/luton_pcb091.dtb"); + type = "flat_dt"; + arch = "mips"; + compression = "none"; + hash@0 { + algo = "sha1"; + }; + }; + }; + + configurations { + pcb091 { + description = "Luton Linux kernel"; + kernel = "kernel@0"; + fdt = "fdt@luton_pcb091"; + }; + }; +}; diff --git a/arch/mips/generic/board-serval.its.S b/arch/mips/generic/board-serval.its.S new file mode 100644 index 000000000000..4ea4fc9d757f --- /dev/null +++ b/arch/mips/generic/board-serval.its.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/ { + images { + fdt@serval_pcb105 { + description = "MSCC Serval PCB105 Device Tree"; + data = /incbin/("boot/dts/mscc/serval_pcb105.dtb"); + type = "flat_dt"; + arch = "mips"; + compression = "none"; + hash@0 { + algo = "sha1"; + }; + }; + }; + + configurations { + pcb105 { + description = "Serval Linux kernel"; + kernel = "kernel@0"; + fdt = "fdt@serval_pcb105"; + ramdisk = "ramdisk"; + }; + }; +}; diff --git a/arch/mips/include/asm/compiler.h b/arch/mips/include/asm/compiler.h index a2cb2d2b1c07..2b06090a78b2 100644 --- a/arch/mips/include/asm/compiler.h +++ b/arch/mips/include/asm/compiler.h @@ -43,14 +43,7 @@ #undef barrier_before_unreachable #define barrier_before_unreachable() asm volatile(".insn") -#if !defined(CONFIG_CC_IS_GCC) || \ - (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9) -# define GCC_OFF_SMALL_ASM() "ZC" -#elif defined(CONFIG_CPU_MICROMIPS) -# error "microMIPS compilation unsupported with GCC older than 4.9" -#else -# define GCC_OFF_SMALL_ASM() "R" -#endif +#define GCC_OFF_SMALL_ASM() "ZC" #ifdef CONFIG_CPU_MIPSR6 #define MIPS_ISA_LEVEL "mips64r6" diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index f2e216eef7da..336e02b3b3ce 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -115,8 +115,6 @@ #ifndef cpu_has_3k_cache #define cpu_has_3k_cache __isa_lt_and_opt(1, MIPS_CPU_3K_CACHE) #endif -#define cpu_has_6k_cache 0 -#define cpu_has_8k_cache 0 #ifndef cpu_has_4k_cache #define cpu_has_4k_cache __isa_ge_or_opt(1, MIPS_CPU_4K_CACHE) #endif diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index afc92b7a61c6..4592841b6b0c 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -228,6 +228,10 @@ struct loongson_system_configuration { extern struct efi_memory_map_loongson *loongson_memmap; extern struct loongson_system_configuration loongson_sysconf; +extern struct board_devices *eboard; +extern struct interface_info *einter; +extern struct loongson_special_attribute *especial; + extern u32 node_id_offset; extern void ls7a_early_config(void); extern void rs780e_early_config(void); diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h index 87a5bfbf8cfe..e4d77f4f0fe3 100644 --- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h @@ -19,10 +19,6 @@ .macro kernel_entry_setup .set push .set mips64 - /* Set LPA on LOONGSON3 config3 */ - mfc0 t0, CP0_CONFIG3 - or t0, (0x1 << 7) - mtc0 t0, CP0_CONFIG3 /* Set ELPA on LOONGSON3 pagegrain */ mfc0 t0, CP0_PAGEGRAIN or t0, (0x1 << 29) @@ -54,10 +50,6 @@ .macro smp_slave_setup .set push .set mips64 - /* Set LPA on LOONGSON3 config3 */ - mfc0 t0, CP0_CONFIG3 - or t0, (0x1 << 7) - mtc0 t0, CP0_CONFIG3 /* Set ELPA on LOONGSON3 pagegrain */ mfc0 t0, CP0_PAGEGRAIN or t0, (0x1 << 29) diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h index 83dbb9fdf9c2..165993514762 100644 --- a/arch/mips/include/asm/mach-loongson64/loongson_regs.h +++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h @@ -227,6 +227,16 @@ static inline void csr_writeq(u64 val, u32 reg) #define CSR_IPI_SEND_CPU_SHIFT 16 #define CSR_IPI_SEND_BLOCK BIT(31) +#define LOONGSON_CSR_MAIL_BUF0 0x1020 +#define LOONGSON_CSR_MAIL_SEND 0x1048 +#define CSR_MAIL_SEND_BLOCK BIT_ULL(31) +#define CSR_MAIL_SEND_BOX_LOW(box) (box << 1) +#define CSR_MAIL_SEND_BOX_HIGH(box) ((box << 1) + 1) +#define CSR_MAIL_SEND_BOX_SHIFT 2 +#define CSR_MAIL_SEND_CPU_SHIFT 16 +#define CSR_MAIL_SEND_BUF_SHIFT 32 +#define CSR_MAIL_SEND_H32_MASK 0xFFFFFFFF00000000ULL + static inline u64 drdtime(void) { int rID = 0; diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index e5ef0fdd4838..4f9c37616d42 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -25,14 +25,8 @@ struct mm_struct; struct vm_area_struct; -#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \ - _page_cachable_default) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ - _page_cachable_default) -#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \ - _page_cachable_default) -#define PAGE_READONLY __pgprot(_PAGE_PRESENT | \ - _page_cachable_default) +#define PAGE_SHARED vm_get_page_prot(VM_READ|VM_WRITE|VM_SHARED) + #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ _PAGE_GLOBAL | _page_cachable_default) #define PAGE_KERNEL_NC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \ diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index ee26f9a4575d..e2c352da3877 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -115,6 +115,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SECCOMP 4 /* secure computing */ #define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ #define TIF_UPROBE 6 /* breakpointed or singlestepping */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ @@ -139,6 +140,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SECCOMP (1< -#include #endif /* _ASM_TYPES_H */ diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index f60af512c877..90f1c3df1f0e 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -266,6 +266,7 @@ int mips_cm_probe(void) if ((base_reg & CM_GCR_BASE_GCRBASE) != addr) { pr_err("GCRs appear to have been moved (expected them at 0x%08lx)!\n", (unsigned long)addr); + iounmap(mips_gcr_base); mips_gcr_base = NULL; return -ENODEV; } diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 3d80a51256de..47aeb3350a76 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -64,7 +64,7 @@ static void __init sync_icache(void *kbase, unsigned long kernel_length) : "r" (kbase)); kbase += step; - } while (kbase < kend); + } while (step && kbase < kend); /* Completion barrier */ __sync(); @@ -95,7 +95,7 @@ static int __init apply_r_mips_26_rel(u32 *loc_orig, u32 *loc_new, long offset) /* Original target address */ target_addr <<= 2; - target_addr += (unsigned long)loc_orig & ~0x03ffffff; + target_addr += (unsigned long)loc_orig & 0xf0000000; /* Get the new target address */ target_addr += offset; @@ -105,7 +105,7 @@ static int __init apply_r_mips_26_rel(u32 *loc_orig, u32 *loc_new, long offset) return -ENOEXEC; } - target_addr -= (unsigned long)loc_new & ~0x03ffffff; + target_addr -= (unsigned long)loc_new & 0xf0000000; target_addr >>= 2; *loc_new = (*loc_new & ~0x03ffffff) | (target_addr & 0x03ffffff); @@ -294,6 +294,13 @@ static inline int __init relocation_addr_valid(void *loc_new) return 1; } +#if defined(CONFIG_USE_OF) +void __weak *plat_get_fdt(void) +{ + return NULL; +} +#endif + void *__init relocate_kernel(void) { void *loc_new; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index ca579deef939..7e1f8e277437 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -498,8 +498,8 @@ static void __init request_crashkernel(struct resource *res) static void __init check_kernel_sections_mem(void) { - phys_addr_t start = PFN_PHYS(PFN_DOWN(__pa_symbol(&_text))); - phys_addr_t size = PFN_PHYS(PFN_UP(__pa_symbol(&_end))) - start; + phys_addr_t start = __pa_symbol(&_text); + phys_addr_t size = __pa_symbol(&_end) - start; if (!memblock_is_region_memory(start, size)) { pr_info("Kernel sections are not in the memory maps\n"); @@ -688,8 +688,6 @@ static void __init arch_mem_init(char **cmdline_p) fdt_init_reserved_mem(); - memblock_dump_all(); - early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn)); } @@ -787,6 +785,8 @@ void __init setup_arch(char **cmdline_p) cpu_cache_init(); paging_init(); + + memblock_dump_all(); } unsigned long kernelsp[NR_CPUS]; diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 50d0515bea21..f1e985109da0 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -903,7 +903,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, uprobe_notify_resume(regs); /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) { diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 1dbfb5aadffd..359b176b665f 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -362,9 +362,6 @@ static int bmips_cpu_disable(void) { unsigned int cpu = smp_processor_id(); - if (cpu == 0) - return -EBUSY; - pr_info("SMP: CPU%d is offline\n", cpu); set_cpu_online(cpu, false); diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index dbb3f1fc71ab..8b027c72b8ef 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -461,6 +462,7 @@ static int cps_cpu_disable(void) smp_mb__after_atomic(); set_cpu_online(cpu, false); calculate_cpu_foreign_map(); + irq_migrate_all_off_this_cpu(); return 0; } diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 242dc5e83847..7d0b91ad2581 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -161,7 +161,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) gic_pfn = virt_to_phys(mips_gic_base + MIPS_GIC_USER_OFS) >> PAGE_SHIFT; ret = io_remap_pfn_range(vma, base, gic_pfn, gic_size, - pgprot_noncached(PAGE_READONLY)); + pgprot_noncached(vma->vm_page_prot)); if (ret) goto out; } @@ -169,7 +169,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* Map data page. */ ret = remap_pfn_range(vma, data_addr, virt_to_phys(vdso_data) >> PAGE_SHIFT, - PAGE_SIZE, PAGE_READONLY); + PAGE_SIZE, vma->vm_page_prot); if (ret) goto out; diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 28c366d307e7..3dabeda82458 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -1074,6 +1074,7 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, { kvm_pfn_t pfn; pte_t *ptep; + pgprot_t prot; ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr); if (!ptep) { @@ -1083,7 +1084,8 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); /* Also set valid and dirty, so refill handler doesn't have to */ - *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED))); + prot = vm_get_page_prot(VM_READ|VM_WRITE|VM_SHARED); + *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, prot))); /* Invalidate this entry in the TLB, guest kernel ASID only */ kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c index 09d5deea747f..f80a67c092b6 100644 --- a/arch/mips/lib/uncached.c +++ b/arch/mips/lib/uncached.c @@ -37,10 +37,12 @@ */ unsigned long run_uncached(void *func) { - register long sp __asm__("$sp"); register long ret __asm__("$2"); long lfunc = (long)func, ufunc; long usp; + long sp; + + __asm__("move %0, $sp" : "=r" (sp)); if (sp >= (long)CKSEG0 && sp < (long)CKSEG2) usp = CKSEG1ADDR(sp); diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile index 39c06f52b08f..cc76944b1a9d 100644 --- a/arch/mips/loongson64/Makefile +++ b/arch/mips/loongson64/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_RS780_HPET) += hpet.o obj-$(CONFIG_SUSPEND) += pm.o obj-$(CONFIG_PCI_QUIRKS) += vbios_quirk.o obj-$(CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION) += cpucfg-emul.o +obj-$(CONFIG_SYSFS) += boardinfo.o diff --git a/arch/mips/loongson64/boardinfo.c b/arch/mips/loongson64/boardinfo.c new file mode 100644 index 000000000000..280989c5a137 --- /dev/null +++ b/arch/mips/loongson64/boardinfo.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +static ssize_t boardinfo_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + char board_manufacturer[64] = {0}; + char *tmp_board_manufacturer = board_manufacturer; + char bios_vendor[64] = {0}; + char *tmp_bios_vendor = bios_vendor; + + strcpy(board_manufacturer, eboard->name); + strcpy(bios_vendor, einter->description); + + return sprintf(buf, + "Board Info\n" + "Manufacturer\t\t: %s\n" + "Board Name\t\t: %s\n" + "Family\t\t\t: LOONGSON3\n\n" + "BIOS Info\n" + "Vendor\t\t\t: %s\n" + "Version\t\t\t: %s\n" + "ROM Size\t\t: %d KB\n" + "Release Date\t\t: %s\n", + strsep(&tmp_board_manufacturer, "-"), + eboard->name, + strsep(&tmp_bios_vendor, "-"), + einter->description, + einter->size, + especial->special_name); +} +static struct kobj_attribute boardinfo_attr = __ATTR(boardinfo, 0444, + boardinfo_show, NULL); + +static int __init boardinfo_init(void) +{ + struct kobject *lefi_kobj; + + lefi_kobj = kobject_create_and_add("lefi", firmware_kobj); + if (!lefi_kobj) { + pr_err("lefi: Firmware registration failed.\n"); + return -ENOMEM; + } + + return sysfs_create_file(lefi_kobj, &boardinfo_attr.attr); +} +late_initcall(boardinfo_init); diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index 134cb8e9efc2..51a5d050a94c 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c @@ -28,6 +28,10 @@ EXPORT_SYMBOL(cpu_clock_freq); struct efi_memory_map_loongson *loongson_memmap; struct loongson_system_configuration loongson_sysconf; +struct board_devices *eboard; +struct interface_info *einter; +struct loongson_special_attribute *especial; + u64 loongson_chipcfg[MAX_PACKAGES] = {0xffffffffbfc00180}; u64 loongson_chiptemp[MAX_PACKAGES]; u64 loongson_freqctrl[MAX_PACKAGES]; @@ -57,6 +61,12 @@ void __init prom_init_env(void) ((u64)loongson_p + loongson_p->system_offset); ecpu = (struct efi_cpuinfo_loongson *) ((u64)loongson_p + loongson_p->cpu_offset); + eboard = (struct board_devices *) + ((u64)loongson_p + loongson_p->boarddev_table_offset); + einter = (struct interface_info *) + ((u64)loongson_p + loongson_p->interface_offset); + especial = (struct loongson_special_attribute *) + ((u64)loongson_p + loongson_p->special_offset); eirq_source = (struct irq_source_routing_table *) ((u64)loongson_p + loongson_p->irq_offset); loongson_memmap = (struct efi_memory_map_loongson *) diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index cf9459f79f9b..c6f0c48384f8 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -35,23 +35,6 @@ EXPORT_SYMBOL(__node_data); cpumask_t __node_cpumask[MAX_NUMNODES]; EXPORT_SYMBOL(__node_cpumask); -static void enable_lpa(void) -{ - unsigned long value; - - value = __read_32bit_c0_register($16, 3); - value |= 0x00000080; - __write_32bit_c0_register($16, 3, value); - value = __read_32bit_c0_register($16, 3); - pr_info("CP0_Config3: CP0 16.3 (0x%lx)\n", value); - - value = __read_32bit_c0_register($5, 1); - value |= 0x20000000; - __write_32bit_c0_register($5, 1, value); - value = __read_32bit_c0_register($5, 1); - pr_info("CP0_PageGrain: CP0 5.1 (0x%lx)\n", value); -} - static void cpu_node_probe(void) { int i; @@ -168,6 +151,9 @@ static void __init node_mem_init(unsigned int node) NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn; if (node == 0) { + /* kernel start address */ + unsigned long kernel_start_pfn = PFN_DOWN(__pa_symbol(&_text)); + /* kernel end address */ unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end)); @@ -175,8 +161,8 @@ static void __init node_mem_init(unsigned int node) max_low_pfn = end_pfn; /* Reserve the kernel text/data/bss */ - memblock_reserve(start_pfn << PAGE_SHIFT, - ((kernel_end_pfn - start_pfn) << PAGE_SHIFT)); + memblock_reserve(kernel_start_pfn << PAGE_SHIFT, + ((kernel_end_pfn - kernel_start_pfn) << PAGE_SHIFT)); /* Reserve 0xfe000000~0xffffffff for RS780E integrated GPU */ if (node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT)) @@ -243,7 +229,8 @@ EXPORT_SYMBOL(pcibus_to_node); void __init prom_init_numa_memory(void) { - enable_lpa(); + pr_info("CP0_Config3: CP0 16.3 (0x%x)\n", read_c0_config3()); + pr_info("CP0_PageGrain: CP0 5.1 (0x%x)\n", read_c0_pagegrain()); prom_meminit(); } EXPORT_SYMBOL(prom_init_numa_memory); diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index e744e1bee49e..b8c1fc3158fd 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -53,6 +53,29 @@ static uint32_t core0_c0count[NR_CPUS]; u32 (*ipi_read_clear)(int cpu); void (*ipi_write_action)(int cpu, u32 action); +void (*ipi_write_enable)(int cpu); +void (*ipi_clear_buf)(int cpu); +void (*ipi_write_buf)(int cpu, struct task_struct *idle); + +/* send mail via Mail_Send register for 3A4000+ CPU */ +static void csr_mail_send(uint64_t data, int cpu, int mailbox) +{ + uint64_t val; + + /* send high 32 bits */ + val = CSR_MAIL_SEND_BLOCK; + val |= (CSR_MAIL_SEND_BOX_HIGH(mailbox) << CSR_MAIL_SEND_BOX_SHIFT); + val |= (cpu << CSR_MAIL_SEND_CPU_SHIFT); + val |= (data & CSR_MAIL_SEND_H32_MASK); + csr_writeq(val, LOONGSON_CSR_MAIL_SEND); + + /* send low 32 bits */ + val = CSR_MAIL_SEND_BLOCK; + val |= (CSR_MAIL_SEND_BOX_LOW(mailbox) << CSR_MAIL_SEND_BOX_SHIFT); + val |= (cpu << CSR_MAIL_SEND_CPU_SHIFT); + val |= (data << CSR_MAIL_SEND_BUF_SHIFT); + csr_writeq(val, LOONGSON_CSR_MAIL_SEND); +}; static u32 csr_ipi_read_clear(int cpu) { @@ -79,6 +102,35 @@ static void csr_ipi_write_action(int cpu, u32 action) } } +static void csr_ipi_write_enable(int cpu) +{ + csr_writel(0xffffffff, LOONGSON_CSR_IPI_EN); +} + +static void csr_ipi_clear_buf(int cpu) +{ + csr_writeq(0, LOONGSON_CSR_MAIL_BUF0); +} + +static void csr_ipi_write_buf(int cpu, struct task_struct *idle) +{ + unsigned long startargs[4]; + + /* startargs[] are initial PC, SP and GP for secondary CPU */ + startargs[0] = (unsigned long)&smp_bootstrap; + startargs[1] = (unsigned long)__KSTK_TOS(idle); + startargs[2] = (unsigned long)task_thread_info(idle); + startargs[3] = 0; + + pr_debug("CPU#%d, func_pc=%lx, sp=%lx, gp=%lx\n", + cpu, startargs[0], startargs[1], startargs[2]); + + csr_mail_send(startargs[3], cpu_logical_map(cpu), 3); + csr_mail_send(startargs[2], cpu_logical_map(cpu), 2); + csr_mail_send(startargs[1], cpu_logical_map(cpu), 1); + csr_mail_send(startargs[0], cpu_logical_map(cpu), 0); +} + static u32 legacy_ipi_read_clear(int cpu) { u32 action; @@ -96,14 +148,53 @@ static void legacy_ipi_write_action(int cpu, u32 action) loongson3_ipi_write32((u32)action, ipi_set0_regs[cpu]); } +static void legacy_ipi_write_enable(int cpu) +{ + loongson3_ipi_write32(0xffffffff, ipi_en0_regs[cpu_logical_map(cpu)]); +} + +static void legacy_ipi_clear_buf(int cpu) +{ + loongson3_ipi_write64(0, ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0); +} + +static void legacy_ipi_write_buf(int cpu, struct task_struct *idle) +{ + unsigned long startargs[4]; + + /* startargs[] are initial PC, SP and GP for secondary CPU */ + startargs[0] = (unsigned long)&smp_bootstrap; + startargs[1] = (unsigned long)__KSTK_TOS(idle); + startargs[2] = (unsigned long)task_thread_info(idle); + startargs[3] = 0; + + pr_debug("CPU#%d, func_pc=%lx, sp=%lx, gp=%lx\n", + cpu, startargs[0], startargs[1], startargs[2]); + + loongson3_ipi_write64(startargs[3], + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x18); + loongson3_ipi_write64(startargs[2], + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x10); + loongson3_ipi_write64(startargs[1], + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x8); + loongson3_ipi_write64(startargs[0], + ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0); +} + static void csr_ipi_probe(void) { if (cpu_has_csr() && csr_readl(LOONGSON_CSR_FEATURES) & LOONGSON_CSRF_IPI) { ipi_read_clear = csr_ipi_read_clear; ipi_write_action = csr_ipi_write_action; + ipi_write_enable = csr_ipi_write_enable; + ipi_clear_buf = csr_ipi_clear_buf; + ipi_write_buf = csr_ipi_write_buf; } else { ipi_read_clear = legacy_ipi_read_clear; ipi_write_action = legacy_ipi_write_action; + ipi_write_enable = legacy_ipi_write_enable; + ipi_clear_buf = legacy_ipi_clear_buf; + ipi_write_buf = legacy_ipi_write_buf; } } @@ -347,9 +438,7 @@ static void loongson3_init_secondary(void) /* Set interrupt mask, but don't enable */ change_c0_status(ST0_IM, imask); - - for (i = 0; i < num_possible_cpus(); i++) - loongson3_ipi_write32(0xffffffff, ipi_en0_regs[cpu_logical_map(i)]); + ipi_write_enable(cpu); per_cpu(cpu_state, cpu) = CPU_ONLINE; cpu_set_core(&cpu_data[cpu], @@ -381,8 +470,8 @@ static void loongson3_smp_finish(void) write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); local_irq_enable(); - loongson3_ipi_write64(0, - ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0); + ipi_clear_buf(cpu); + pr_info("CPU#%d finished, CP0_ST=%x\n", smp_processor_id(), read_c0_status()); } @@ -420,6 +509,8 @@ static void __init loongson3_smp_setup(void) ipi_status0_regs_init(); ipi_en0_regs_init(); ipi_mailbox_buf_init(); + ipi_write_enable(0); + cpu_set_core(&cpu_data[0], cpu_logical_map(0) % loongson_sysconf.cores_per_package); cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package; @@ -439,27 +530,10 @@ static void __init loongson3_prepare_cpus(unsigned int max_cpus) */ static int loongson3_boot_secondary(int cpu, struct task_struct *idle) { - unsigned long startargs[4]; - pr_info("Booting CPU#%d...\n", cpu); - /* startargs[] are initial PC, SP and GP for secondary CPU */ - startargs[0] = (unsigned long)&smp_bootstrap; - startargs[1] = (unsigned long)__KSTK_TOS(idle); - startargs[2] = (unsigned long)task_thread_info(idle); - startargs[3] = 0; + ipi_write_buf(cpu, idle); - pr_debug("CPU#%d, func_pc=%lx, sp=%lx, gp=%lx\n", - cpu, startargs[0], startargs[1], startargs[2]); - - loongson3_ipi_write64(startargs[3], - ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x18); - loongson3_ipi_write64(startargs[2], - ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x10); - loongson3_ipi_write64(startargs[1], - ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x8); - loongson3_ipi_write64(startargs[0], - ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0); return 0; } @@ -470,9 +544,6 @@ static int loongson3_cpu_disable(void) unsigned long flags; unsigned int cpu = smp_processor_id(); - if (cpu == 0) - return -EBUSY; - set_cpu_online(cpu, false); calculate_cpu_foreign_map(); local_irq_save(flags); @@ -690,9 +761,10 @@ static void loongson3_type3_play_dead(int *state_addr) "1: li %[count], 0x100 \n" /* wait for init loop */ "2: bnez %[count], 2b \n" /* limit mailbox access */ " addiu %[count], -1 \n" - " ld %[initfunc], 0x20(%[base]) \n" /* get PC via mailbox */ + " lw %[initfunc], 0x20(%[base]) \n" /* check lower 32-bit as jump indicator */ " beqz %[initfunc], 1b \n" " nop \n" + " ld %[initfunc], 0x20(%[base]) \n" /* get PC (whole 64-bit) via mailbox */ " ld $sp, 0x28(%[base]) \n" /* get SP via mailbox */ " ld $gp, 0x30(%[base]) \n" /* get GP via mailbox */ " ld $a1, 0x38(%[base]) \n" diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 9cede7ce37e6..4f976d687ab0 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1609,7 +1609,7 @@ static void __init loongson2_sc_init(void) c->options |= MIPS_CPU_INCLUSIVE_CACHES; } -static void __init loongson3_sc_init(void) +static void loongson3_sc_init(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config2, lsize; @@ -1623,15 +1623,13 @@ static void __init loongson3_sc_init(void) c->scache.sets = 64 << ((config2 >> 8) & 15); c->scache.ways = 1 + (config2 & 15); - scache_size = c->scache.sets * - c->scache.ways * - c->scache.linesz; - /* Loongson-3 has 4-Scache banks, while Loongson-2K have only 2 banks */ if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) - scache_size *= 2; + c->scache.sets *= 2; else - scache_size *= 4; + c->scache.sets *= 4; + + scache_size = c->scache.sets * c->scache.ways * c->scache.linesz; c->scache.waybit = 0; c->scache.waysize = scache_size / c->scache.ways; diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 3e81ba000096..23b16bfd97b2 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -155,47 +155,32 @@ void __update_cache(unsigned long address, pte_t pte) unsigned long _page_cachable_default; EXPORT_SYMBOL(_page_cachable_default); +#define PM(p) __pgprot(_page_cachable_default | (p)) + static inline void setup_protection_map(void) { - if (cpu_has_rixi) { - protection_map[0] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[1] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[2] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[3] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[4] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[5] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[6] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[7] = __pgprot(_page_cachable_default | _PAGE_PRESENT); + protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[4] = PM(_PAGE_PRESENT); + protection_map[5] = PM(_PAGE_PRESENT); + protection_map[6] = PM(_PAGE_PRESENT); + protection_map[7] = PM(_PAGE_PRESENT); - protection_map[8] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[9] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[10] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); - protection_map[11] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); - protection_map[12] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[13] = __pgprot(_page_cachable_default | _PAGE_PRESENT); - protection_map[14] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE); - protection_map[15] = __pgprot(_page_cachable_default | _PAGE_PRESENT | _PAGE_WRITE); - - } else { - protection_map[0] = PAGE_NONE; - protection_map[1] = PAGE_READONLY; - protection_map[2] = PAGE_COPY; - protection_map[3] = PAGE_COPY; - protection_map[4] = PAGE_READONLY; - protection_map[5] = PAGE_READONLY; - protection_map[6] = PAGE_COPY; - protection_map[7] = PAGE_COPY; - protection_map[8] = PAGE_NONE; - protection_map[9] = PAGE_READONLY; - protection_map[10] = PAGE_SHARED; - protection_map[11] = PAGE_SHARED; - protection_map[12] = PAGE_READONLY; - protection_map[13] = PAGE_READONLY; - protection_map[14] = PAGE_SHARED; - protection_map[15] = PAGE_SHARED; - } + protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | + _PAGE_NO_READ); + protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[12] = PM(_PAGE_PRESENT); + protection_map[13] = PM(_PAGE_PRESENT); + protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE); } +#undef PM + void cpu_cache_init(void) { if (cpu_has_3k_cache) { @@ -203,21 +188,11 @@ void cpu_cache_init(void) r3k_cache_init(); } - if (cpu_has_6k_cache) { - extern void __weak r6k_cache_init(void); - - r6k_cache_init(); - } if (cpu_has_4k_cache) { extern void __weak r4k_cache_init(void); r4k_cache_init(); } - if (cpu_has_8k_cache) { - extern void __weak r8k_cache_init(void); - - r8k_cache_init(); - } if (cpu_has_tx39_cache) { extern void __weak tx39_cache_init(void); diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index 77ffece9c270..b9f76f433617 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -58,18 +58,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return (pte_t *) pmd; } -/* - * This function checks for proper alignment of input addr and len parameters. - */ -int is_aligned_hugepage_range(unsigned long addr, unsigned long len) -{ - if (len & ~HPAGE_MASK) - return -EINVAL; - if (addr & ~HPAGE_MASK) - return -EINVAL; - return 0; -} - int pmd_huge(pmd_t pmd) { return (pmd_val(pmd) & _PAGE_HUGE) != 0; diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index dd0a5becaabd..06ec304ad4d1 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -146,7 +146,7 @@ static inline int mips_sc_is_activated(struct cpuinfo_mips *c) return 1; } -static int __init mips_sc_probe_cm3(void) +static int mips_sc_probe_cm3(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned long cfg = read_gcr_l2_config(); @@ -180,7 +180,7 @@ static int __init mips_sc_probe_cm3(void) return 0; } -static inline int __init mips_sc_probe(void) +static inline int mips_sc_probe(void) { struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int config1, config2; diff --git a/arch/mips/pci/pci-vr41xx.c b/arch/mips/pci/pci-vr41xx.c index 1fa4e101470c..4f250c55b6e6 100644 --- a/arch/mips/pci/pci-vr41xx.c +++ b/arch/mips/pci/pci-vr41xx.c @@ -293,8 +293,10 @@ static int __init vr41xx_pciu_init(void) master = setup->master_io; io_map_base = ioremap(master->bus_base_address, resource_size(res)); - if (!io_map_base) + if (!io_map_base) { + iounmap(pciu_base); return -EBUSY; + } vr41xx_pci_controller.io_map_base = (unsigned long)io_map_base; } diff --git a/arch/nds32/include/asm/thread_info.h b/arch/nds32/include/asm/thread_info.h index c135111ec44e..d3967ad184f0 100644 --- a/arch/nds32/include/asm/thread_info.h +++ b/arch/nds32/include/asm/thread_info.h @@ -48,6 +48,7 @@ struct thread_info { #define TIF_NEED_RESCHED 2 #define TIF_SINGLESTEP 3 #define TIF_NOTIFY_RESUME 4 /* callback before returning to user */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 17 #define TIF_MEMDIE 18 @@ -57,6 +58,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S index 6a2966c2d8c8..b30699911b81 100644 --- a/arch/nds32/kernel/ex-exit.S +++ b/arch/nds32/kernel/ex-exit.S @@ -120,7 +120,7 @@ work_pending: andi $p1, $r1, #_TIF_NEED_RESCHED bnez $p1, work_resched - andi $p1, $r1, #_TIF_SIGPENDING|#_TIF_NOTIFY_RESUME + andi $p1, $r1, #_TIF_SIGPENDING|#_TIF_NOTIFY_RESUME|#_TIF_NOTIFY_SIGNAL beqz $p1, no_work_pending move $r0, $sp ! 'regs' diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index 2acb94812af9..7e3ca430a223 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -376,7 +376,7 @@ static void do_signal(struct pt_regs *regs) asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int thread_flags) { - if (thread_flags & _TIF_SIGPENDING) + if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_flags & _TIF_NOTIFY_RESUME) diff --git a/arch/nios2/include/asm/thread_info.h b/arch/nios2/include/asm/thread_info.h index 7349a4fa635b..272d2c72a727 100644 --- a/arch/nios2/include/asm/thread_info.h +++ b/arch/nios2/include/asm/thread_info.h @@ -86,6 +86,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 4 /* is terminating due to OOM killer */ #define TIF_SECCOMP 5 /* secure computing */ #define TIF_SYSCALL_AUDIT 6 /* syscall auditing active */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling @@ -97,6 +98,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index cf2dca2ac7c3..2009ae2d3c3b 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -306,7 +306,8 @@ asmlinkage int do_notify_resume(struct pt_regs *regs) if (!user_mode(regs)) return 0; - if (test_thread_flag(TIF_SIGPENDING)) { + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) { int restart = do_signal(regs); if (unlikely(restart)) { diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index 9afe68bc423b..4f9d2a261455 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -98,6 +98,7 @@ register struct thread_info *current_thread_info_reg asm("r10"); #define TIF_SINGLESTEP 4 /* restore singlestep on return to user * mode */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_SYSCALL_TRACEPOINT 8 /* for ftrace syscall instrumentation */ #define TIF_RESTORE_SIGMASK 9 #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling * TIF_NEED_RESCHED @@ -109,6 +110,7 @@ register struct thread_info *current_thread_info_reg asm("r10"); #define _TIF_SIGPENDING (1< + +#ifdef CONFIG_64BIT +# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_PARISC64 +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "parisc64" +# ifdef CONFIG_COMPAT +# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_PARISC +# define SECCOMP_ARCH_COMPAT_NR NR_syscalls +# define SECCOMP_ARCH_COMPAT_NAME "parisc" +# endif +#else /* !CONFIG_64BIT */ +# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_PARISC +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "parisc" +#endif + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 285757544cca..0bd38a972cea 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -52,6 +52,7 @@ struct thread_info { #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ +#define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ #define TIF_SINGLESTEP 9 /* single stepping? */ @@ -61,6 +62,7 @@ struct thread_info { #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_32BIT (1 << TIF_32BIT) @@ -72,7 +74,7 @@ struct thread_info { #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ - _TIF_NEED_RESCHED) + _TIF_NEED_RESCHED | _TIF_NOTIFY_SIGNAL) #define _TIF_SYSCALL_TRACE_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \ _TIF_BLOCKSTEP | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) diff --git a/arch/parisc/include/uapi/asm/types.h b/arch/parisc/include/uapi/asm/types.h deleted file mode 100644 index 28c7d7453b10..000000000000 --- a/arch/parisc/include/uapi/asm/types.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _PARISC_TYPES_H -#define _PARISC_TYPES_H - -#include - -#endif diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index f6f28e41bb5e..beba9816cc6c 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -887,7 +887,7 @@ intr_check_sig: /* As above */ mfctl %cr30,%r1 LDREG TI_FLAGS(%r1),%r19 - ldi (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), %r20 + ldi (_TIF_USER_WORK_MASK & ~_TIF_NEED_RESCHED), %r20 and,COND(<>) %r19, %r20, %r0 b,n intr_restore /* skip past if we've nothing to do */ @@ -1810,7 +1810,7 @@ syscall_check_resched: .import do_signal,code syscall_check_sig: LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 - ldi (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME), %r26 + ldi (_TIF_USER_WORK_MASK & ~_TIF_NEED_RESCHED), %r26 and,COND(<>) %r19, %r26, %r0 b,n syscall_restore /* skip past if we've nothing to do */ diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 36610a5c029f..36a57aa38e87 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -335,7 +335,7 @@ pcxl_free_range(unsigned long vaddr, size_t size) dump_resmap(); } -static int proc_pcxl_dma_show(struct seq_file *m, void *v) +static int __maybe_unused proc_pcxl_dma_show(struct seq_file *m, void *v) { #if 0 u_long i = 0; diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 7f2d0c0ecc80..1b6129e7d776 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -163,7 +163,6 @@ static int __init processor_probe(struct parisc_device *dev) if (cpuid) memset(p, 0, sizeof(struct cpuinfo_parisc)); - p->loops_per_jiffy = loops_per_jiffy; p->dev = dev; /* Save IODC data in case we need it */ p->hpa = dev->hpa.start; /* save CPU hpa */ p->cpuid = cpuid; /* save CPU id */ @@ -434,8 +433,8 @@ show_cpuinfo (struct seq_file *m, void *v) show_cache_info(m); seq_printf(m, "bogomips\t: %lu.%02lu\n", - cpuinfo->loops_per_jiffy / (500000 / HZ), - (cpuinfo->loops_per_jiffy / (5000 / HZ)) % 100); + loops_per_jiffy / (500000 / HZ), + loops_per_jiffy / (5000 / HZ) % 100); seq_printf(m, "software id\t: %ld\n\n", boot_cpu_data.pdc.model.sw_id); diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 9f43eaeb0b0a..fb1e94a3982b 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -603,7 +603,8 @@ do_signal(struct pt_regs *regs, long in_syscall) void do_notify_resume(struct pt_regs *regs, long in_syscall) { - if (test_thread_flag(TIF_SIGPENDING)) + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) do_signal(regs, in_syscall); if (test_thread_flag(TIF_NOTIFY_RESUME)) diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 9549496f5523..5f12537318ab 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -53,6 +53,25 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr, return base + off; } + +#define STACK_SIZE_DEFAULT (USER_WIDE_MODE \ + ? (1 << 30) /* 1 GB */ \ + : (CONFIG_STACK_MAX_DEFAULT_SIZE_MB*1024*1024)) + +unsigned long calc_max_stack_size(unsigned long stack_max) +{ +#ifdef CONFIG_COMPAT + if (!USER_WIDE_MODE && (stack_max == COMPAT_RLIM_INFINITY)) + stack_max = STACK_SIZE_DEFAULT; + else +#endif + if (stack_max == RLIM_INFINITY) + stack_max = STACK_SIZE_DEFAULT; + + return stack_max; +} + + /* * Top of mmap area (just below the process stack). */ @@ -69,8 +88,8 @@ static unsigned long mmap_upper_limit(struct rlimit *rlim_stack) /* Limit stack size - see setup_arg_pages() in fs/exec.c */ stack_base = rlim_stack ? rlim_stack->rlim_max : rlimit_max(RLIMIT_STACK); - if (stack_base > STACK_SIZE_MAX) - stack_base = STACK_SIZE_MAX; + + stack_base = calc_max_stack_size(stack_base); /* Add space for stack randomization. */ if (current->flags & PF_RANDOMIZE) diff --git a/arch/powerpc/include/asm/seccomp.h b/arch/powerpc/include/asm/seccomp.h index 51209f6071c5..ac2033f134f0 100644 --- a/arch/powerpc/include/asm/seccomp.h +++ b/arch/powerpc/include/asm/seccomp.h @@ -8,4 +8,27 @@ #include +#ifdef __LITTLE_ENDIAN__ +#define __SECCOMP_ARCH_LE __AUDIT_ARCH_LE +#define __SECCOMP_ARCH_LE_NAME "le" +#else +#define __SECCOMP_ARCH_LE 0 +#define __SECCOMP_ARCH_LE_NAME +#endif + +#ifdef CONFIG_PPC64 +# define SECCOMP_ARCH_NATIVE (AUDIT_ARCH_PPC64 | __SECCOMP_ARCH_LE) +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "ppc64" __SECCOMP_ARCH_LE_NAME +# ifdef CONFIG_COMPAT +# define SECCOMP_ARCH_COMPAT (AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE) +# define SECCOMP_ARCH_COMPAT_NR NR_syscalls +# define SECCOMP_ARCH_COMPAT_NAME "ppc" __SECCOMP_ARCH_LE_NAME +# endif +#else /* !CONFIG_PPC64 */ +# define SECCOMP_ARCH_NATIVE (AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE) +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "ppc" __SECCOMP_ARCH_LE_NAME +#endif + #endif /* _ASM_POWERPC_SECCOMP_H */ diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 46a210b03d2b..53115ae61495 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -90,6 +90,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */ #define TIF_SYSCALL_EMU 4 /* syscall emulation active */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_PATCH_PENDING 6 /* pending live patching update */ @@ -115,6 +116,7 @@ void arch_setup_new_exec(void); #define _TIF_SYSCALL_TRACE (1<thread.regs); do_signal(current); } diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h index bf7744ee3b3d..c7ee6a3507be 100644 --- a/arch/riscv/include/asm/seccomp.h +++ b/arch/riscv/include/asm/seccomp.h @@ -7,4 +7,14 @@ #include +#ifdef CONFIG_64BIT +# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_RISCV64 +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "riscv64" +#else /* !CONFIG_64BIT */ +# define SECCOMP_ARCH_NATIVE AUDIT_ARCH_RISCV32 +# define SECCOMP_ARCH_NATIVE_NR NR_syscalls +# define SECCOMP_ARCH_NATIVE_NAME "riscv32" +#endif + #endif /* _ASM_SECCOMP_H */ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index a390711129de..97bf5a1575d2 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -74,6 +74,7 @@ struct thread_info { #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ #define TIF_SECCOMP 8 /* syscall secure computing */ +#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -82,9 +83,11 @@ struct thread_info { #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_WORK_MASK \ - (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) + (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \ + _TIF_NOTIFY_SIGNAL) #define _TIF_SYSCALL_WORK \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \ diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index bc6841867b51..469aef8ed922 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -310,7 +310,7 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) { /* Handle pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index e3e2ab0acf83..778247bb1d61 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -104,6 +104,8 @@ struct ccw_device { was successfully verified. */ #define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had to be established again. */ +#define PE_PATH_FCES_EVENT 0x8 /* The FCES Status of a path has + * changed. */ /* * Possible CIO actions triggered by the unit check handler. diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 23dceb8d0453..ac02df906cae 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -373,5 +373,6 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); int chsc_stzi(void *page, void *result, size_t size); int chsc_sgib(u32 origin); +int chsc_scud(u16 cu, u64 *esm, u8 *esm_valid); #endif diff --git a/arch/s390/include/asm/seccomp.h b/arch/s390/include/asm/seccomp.h index 795bbe0d7ca6..71d46f0ba97b 100644 --- a/arch/s390/include/asm/seccomp.h +++ b/arch/s390/include/asm/seccomp.h @@ -16,4 +16,13 @@ #include +#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_S390X +#define SECCOMP_ARCH_NATIVE_NR NR_syscalls +#define SECCOMP_ARCH_NATIVE_NAME "s390x" +#ifdef CONFIG_COMPAT +# define SECCOMP_ARCH_COMPAT AUDIT_ARCH_S390 +# define SECCOMP_ARCH_COMPAT_NR NR_syscalls +# define SECCOMP_ARCH_COMPAT_NAME "s390" +#endif + #endif /* _ASM_S390_SECCOMP_H */ diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index ce788f3e534d..3c5b1f909b6d 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -65,6 +65,7 @@ void arch_setup_new_exec(void); #define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ +#define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ #define TIF_ISOLATE_BP 8 /* Run process with isolated BP */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ @@ -82,6 +83,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACEPOINT 27 /* syscall tracepoint instrumentation */ #define _TIF_NOTIFY_RESUME BIT(TIF_NOTIFY_RESUME) +#define _TIF_NOTIFY_SIGNAL BIT(TIF_NOTIFY_SIGNAL) #define _TIF_SIGPENDING BIT(TIF_SIGPENDING) #define _TIF_NEED_RESCHED BIT(TIF_NEED_RESCHED) #define _TIF_UPROBE BIT(TIF_UPROBE) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 8bb9ebb71c4b..1f4659203f8c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -52,7 +52,8 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING) + _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING | \ + _TIF_NOTIFY_SIGNAL) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_FPU) @@ -486,8 +487,8 @@ ENTRY(system_call) #endif TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART jo .Lsysc_syscall_restart - TSTMSK __TI_flags(%r12),_TIF_SIGPENDING - jo .Lsysc_sigpending + TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) + jnz .Lsysc_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume j .Lsysc_return @@ -865,8 +866,8 @@ ENTRY(io_int_handler) TSTMSK __TI_flags(%r12),_TIF_PATCH_PENDING jo .Lio_patch_pending #endif - TSTMSK __TI_flags(%r12),_TIF_SIGPENDING - jo .Lio_sigpending + TSTMSK __TI_flags(%r12),(_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL) + jnz .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 9e900a8977bd..b27b6c1f058d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -472,7 +472,7 @@ void do_signal(struct pt_regs *regs) current->thread.system_call = test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (get_signal(&ksig)) { + if (test_thread_flag(TIF_SIGPENDING) && get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ if (current->thread.system_call) { regs->int_code = current->thread.system_call; diff --git a/arch/sh/include/asm/seccomp.h b/arch/sh/include/asm/seccomp.h index 54111e4d32b8..d4578395fd66 100644 --- a/arch/sh/include/asm/seccomp.h +++ b/arch/sh/include/asm/seccomp.h @@ -8,4 +8,14 @@ #define __NR_seccomp_exit __NR_exit #define __NR_seccomp_sigreturn __NR_rt_sigreturn +#ifdef CONFIG_CPU_LITTLE_ENDIAN +#define __SECCOMP_ARCH_LE __AUDIT_ARCH_LE +#else +#define __SECCOMP_ARCH_LE 0 +#endif + +#define SECCOMP_ARCH_NATIVE (AUDIT_ARCH_SH | __SECCOMP_ARCH_LE) +#define SECCOMP_ARCH_NATIVE_NR NR_syscalls +#define SECCOMP_ARCH_NATIVE_NAME "sh" + #endif /* __ASM_SECCOMP_H */ diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index 243ea5150aa0..598d0184ffea 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -105,6 +105,7 @@ extern void init_thread_xstate(void); #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */ #define TIF_SINGLESTEP 4 /* singlestepping active */ #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_SECCOMP 6 /* secure computing */ @@ -116,6 +117,7 @@ extern void init_thread_xstate(void); #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -132,7 +134,7 @@ extern void init_thread_xstate(void); #define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \ _TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \ _TIF_SINGLESTEP | _TIF_NOTIFY_RESUME | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOTIFY_SIGNAL) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \ diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 1add47fd31f6..dd3092911efa 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -499,7 +499,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0, unsigned long thread_info_flags) { /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs, save_r0); if (thread_info_flags & _TIF_NOTIFY_RESUME) diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 548b366165dd..45b4955b253f 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -104,6 +104,7 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_USEDFPU 8 /* FPU was used by this task * this quantum (SMP) */ #define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling @@ -115,11 +116,12 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define _TIF_NOTIFY_RESUME (1< #endif /* _ASM_X86_SECCOMP_H */ diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index c59c42a1221a..9718e9593564 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -7,5 +7,4 @@ generic-y += mcs_spinlock.h generic-y += param.h generic-y += qrwlock.h generic-y += qspinlock.h -generic-y += seccomp.h generic-y += user.h diff --git a/arch/xtensa/include/asm/seccomp.h b/arch/xtensa/include/asm/seccomp.h new file mode 100644 index 000000000000..f1cb6b0a9e1f --- /dev/null +++ b/arch/xtensa/include/asm/seccomp.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_SECCOMP_H +#define _ASM_SECCOMP_H + +#include + +#define SECCOMP_ARCH_NATIVE AUDIT_ARCH_XTENSA +#define SECCOMP_ARCH_NATIVE_NR NR_syscalls +#define SECCOMP_ARCH_NATIVE_NAME "xtensa" + +#endif /* _ASM_SECCOMP_H */ diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 6acbbe0d87d3..a312333a9add 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -111,18 +111,21 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */ #define TIF_SYSCALL_TRACEPOINT 4 /* syscall tracepoint instrumentation */ -#define TIF_MEMDIE 5 /* is terminating due to OOM killer */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */ #define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */ #define TIF_NOTIFY_RESUME 7 /* callback before returning to user */ #define TIF_DB_DISABLED 8 /* debug trap disabled for syscall */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ #define TIF_SECCOMP 10 /* secure computing */ +#define TIF_MEMDIE 11 /* is terminating due to OOM killer */ #define _TIF_SYSCALL_TRACE (1<bi_disk, bio->bi_partno); if (part) - maxsector = part_nr_sects_read(part); - else + maxsector = bdev_nr_sectors(part); + else maxsector = get_capacity(bio->bi_disk); rcu_read_unlock(); @@ -1212,8 +1212,8 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, flush_dcache_page(dst_bv.bv_page); - bio_advance_iter(src, src_iter, bytes); - bio_advance_iter(dst, dst_iter, bytes); + bio_advance_iter_single(src, src_iter, bytes); + bio_advance_iter_single(dst, dst_iter, bytes); } } EXPORT_SYMBOL(bio_copy_data_iter); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 54fbe1e80cc4..031114d454a6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -556,22 +556,22 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg, } /** - * blkg_conf_prep - parse and prepare for per-blkg config update + * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update * @inputp: input string pointer * * Parse the device node prefix part, MAJ:MIN, of per-blkg config update - * from @input and get and return the matching gendisk. *@inputp is + * from @input and get and return the matching bdev. *@inputp is * updated to point past the device node prefix. Returns an ERR_PTR() * value on error. * * Use this function iff blkg_conf_prep() can't be used for some reason. */ -struct gendisk *blkcg_conf_get_disk(char **inputp) +struct block_device *blkcg_conf_open_bdev(char **inputp) { char *input = *inputp; unsigned int major, minor; - struct gendisk *disk; - int key_len, part; + struct block_device *bdev; + int key_len; if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) return ERR_PTR(-EINVAL); @@ -581,16 +581,16 @@ struct gendisk *blkcg_conf_get_disk(char **inputp) return ERR_PTR(-EINVAL); input = skip_spaces(input); - disk = get_gendisk(MKDEV(major, minor), &part); - if (!disk) + bdev = blkdev_get_no_open(MKDEV(major, minor)); + if (!bdev) return ERR_PTR(-ENODEV); - if (part) { - put_disk_and_module(disk); + if (bdev_is_partition(bdev)) { + blkdev_put_no_open(bdev); return ERR_PTR(-ENODEV); } *inputp = input; - return disk; + return bdev; } /** @@ -607,18 +607,18 @@ struct gendisk *blkcg_conf_get_disk(char **inputp) */ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx) - __acquires(rcu) __acquires(&disk->queue->queue_lock) + __acquires(rcu) __acquires(&bdev->bd_disk->queue->queue_lock) { - struct gendisk *disk; + struct block_device *bdev; struct request_queue *q; struct blkcg_gq *blkg; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - q = disk->queue; + q = bdev->bd_disk->queue; rcu_read_lock(); spin_lock_irq(&q->queue_lock); @@ -689,7 +689,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto success; } success: - ctx->disk = disk; + ctx->bdev = bdev; ctx->blkg = blkg; ctx->body = input; return 0; @@ -700,7 +700,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, spin_unlock_irq(&q->queue_lock); rcu_read_unlock(); fail: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue @@ -723,11 +723,11 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); * with blkg_conf_prep(). */ void blkg_conf_finish(struct blkg_conf_ctx *ctx) - __releases(&ctx->disk->queue->queue_lock) __releases(rcu) + __releases(&ctx->bdev->bd_disk->queue->queue_lock) __releases(rcu) { - spin_unlock_irq(&ctx->disk->queue->queue_lock); + spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock); rcu_read_unlock(); - put_disk_and_module(ctx->disk); + blkdev_put_no_open(ctx->bdev); } EXPORT_SYMBOL_GPL(blkg_conf_finish); @@ -820,9 +820,9 @@ static void blkcg_fill_root_iostats(void) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { - struct gendisk *disk = dev_to_disk(dev); - struct hd_struct *part = disk_get_part(disk, 0); - struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue); + struct block_device *bdev = dev_to_bdev(dev); + struct blkcg_gq *blkg = + blk_queue_root_blkg(bdev->bd_disk->queue); struct blkg_iostat tmp; int cpu; @@ -830,7 +830,7 @@ static void blkcg_fill_root_iostats(void) for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; - cpu_dkstats = per_cpu_ptr(part->dkstats, cpu); + cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += cpu_dkstats->ios[STAT_READ]; tmp.ios[BLKG_IOSTAT_WRITE] += @@ -849,7 +849,6 @@ static void blkcg_fill_root_iostats(void) blkg_iostat_set(&blkg->iostat.cur, &tmp); u64_stats_update_end(&blkg->iostat.sync); } - disk_put_part(part); } } diff --git a/block/blk-core.c b/block/blk-core.c index 2db8bda43b6e..96e5fcd7f071 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -666,9 +666,9 @@ static int __init setup_fail_make_request(char *str) } __setup("fail_make_request=", setup_fail_make_request); -static bool should_fail_request(struct hd_struct *part, unsigned int bytes) +static bool should_fail_request(struct block_device *part, unsigned int bytes) { - return part->make_it_fail && should_fail(&fail_make_request, bytes); + return part->bd_make_it_fail && should_fail(&fail_make_request, bytes); } static int __init fail_make_request_debugfs(void) @@ -683,7 +683,7 @@ late_initcall(fail_make_request_debugfs); #else /* CONFIG_FAIL_MAKE_REQUEST */ -static inline bool should_fail_request(struct hd_struct *part, +static inline bool should_fail_request(struct block_device *part, unsigned int bytes) { return false; @@ -691,11 +691,11 @@ static inline bool should_fail_request(struct hd_struct *part, #endif /* CONFIG_FAIL_MAKE_REQUEST */ -static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) +static inline bool bio_check_ro(struct bio *bio, struct block_device *part) { const int op = bio_op(bio); - if (part->policy && op_is_write(op)) { + if (part->bd_read_only && op_is_write(op)) { char b[BDEVNAME_SIZE]; if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) @@ -703,7 +703,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) WARN_ONCE(1, "Trying to write to read-only block-device %s (partno %d)\n", - bio_devname(bio, b), part->partno); + bio_devname(bio, b), part->bd_partno); /* Older lvm-tools actually trigger this */ return false; } @@ -713,7 +713,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) static noinline int should_fail_bio(struct bio *bio) { - if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size)) + if (should_fail_request(bio->bi_disk->part0, bio->bi_iter.bi_size)) return -EIO; return 0; } @@ -742,7 +742,7 @@ static inline int bio_check_eod(struct bio *bio, sector_t maxsector) */ static inline int blk_partition_remap(struct bio *bio) { - struct hd_struct *p; + struct block_device *p; int ret = -EIO; rcu_read_lock(); @@ -755,11 +755,12 @@ static inline int blk_partition_remap(struct bio *bio) goto out; if (bio_sectors(bio)) { - if (bio_check_eod(bio, part_nr_sects_read(p))) + if (bio_check_eod(bio, bdev_nr_sectors(p))) goto out; - bio->bi_iter.bi_sector += p->start_sect; - trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), - bio->bi_iter.bi_sector - p->start_sect); + bio->bi_iter.bi_sector += p->bd_start_sect; + trace_block_bio_remap(bio, p->bd_dev, + bio->bi_iter.bi_sector - + p->bd_start_sect); } bio->bi_partno = 0; ret = 0; @@ -829,7 +830,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) if (unlikely(blk_partition_remap(bio))) goto end_io; } else { - if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) + if (unlikely(bio_check_ro(bio, bio->bi_disk->part0))) goto end_io; if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk)))) goto end_io; @@ -906,7 +907,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) blkcg_bio_issue_init(bio); if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_queue(q, bio); + trace_block_bio_queue(bio); /* Now that enqueuing has been traced, we need to trace * completion as well. */ @@ -1201,7 +1202,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * return ret; if (rq->rq_disk && - should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) + should_fail_request(rq->rq_disk->part0, blk_rq_bytes(rq))) return BLK_STS_IOERR; if (blk_crypto_insert_cloned_request(rq)) @@ -1260,17 +1261,18 @@ unsigned int blk_rq_err_bytes(const struct request *rq) } EXPORT_SYMBOL_GPL(blk_rq_err_bytes); -static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) +static void update_io_ticks(struct block_device *part, unsigned long now, + bool end) { unsigned long stamp; again: - stamp = READ_ONCE(part->stamp); + stamp = READ_ONCE(part->bd_stamp); if (unlikely(stamp != now)) { - if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) + if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp)) __part_stat_add(part, io_ticks, end ? now - stamp : 1); } - if (part->partno) { - part = &part_to_disk(part)->part0; + if (part->bd_partno) { + part = bdev_whole(part); goto again; } } @@ -1279,11 +1281,9 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) { if (req->part && blk_do_io_stat(req)) { const int sgrp = op_stat_group(req_op(req)); - struct hd_struct *part; part_stat_lock(); - part = req->part; - part_stat_add(part, sectors[sgrp], bytes >> 9); + part_stat_add(req->part, sectors[sgrp], bytes >> 9); part_stat_unlock(); } } @@ -1298,17 +1298,12 @@ void blk_account_io_done(struct request *req, u64 now) if (req->part && blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { const int sgrp = op_stat_group(req_op(req)); - struct hd_struct *part; part_stat_lock(); - part = req->part; - - update_io_ticks(part, jiffies, true); - part_stat_inc(part, ios[sgrp]); - part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); + update_io_ticks(req->part, jiffies, true); + part_stat_inc(req->part, ios[sgrp]); + part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); part_stat_unlock(); - - hd_struct_put(part); } } @@ -1324,7 +1319,7 @@ void blk_account_io_start(struct request *rq) part_stat_unlock(); } -static unsigned long __part_start_io_acct(struct hd_struct *part, +static unsigned long __part_start_io_acct(struct block_device *part, unsigned int sectors, unsigned int op) { const int sgrp = op_stat_group(op); @@ -1340,7 +1335,7 @@ static unsigned long __part_start_io_acct(struct hd_struct *part, return now; } -unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, +unsigned long part_start_io_acct(struct gendisk *disk, struct block_device **part, struct bio *bio) { *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); @@ -1352,11 +1347,11 @@ EXPORT_SYMBOL_GPL(part_start_io_acct); unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op) { - return __part_start_io_acct(&disk->part0, sectors, op); + return __part_start_io_acct(disk->part0, sectors, op); } EXPORT_SYMBOL(disk_start_io_acct); -static void __part_end_io_acct(struct hd_struct *part, unsigned int op, +static void __part_end_io_acct(struct block_device *part, unsigned int op, unsigned long start_time) { const int sgrp = op_stat_group(op); @@ -1370,18 +1365,17 @@ static void __part_end_io_acct(struct hd_struct *part, unsigned int op, part_stat_unlock(); } -void part_end_io_acct(struct hd_struct *part, struct bio *bio, +void part_end_io_acct(struct block_device *part, struct bio *bio, unsigned long start_time) { __part_end_io_acct(part, bio_op(bio), start_time); - hd_struct_put(part); } EXPORT_SYMBOL_GPL(part_end_io_acct); void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time) { - __part_end_io_acct(&disk->part0, op, start_time); + __part_end_io_acct(disk->part0, op, start_time); } EXPORT_SYMBOL(disk_end_io_acct); diff --git a/block/blk-flush.c b/block/blk-flush.c index fd5cee9f1a3b..76c1624cb06c 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -69,7 +69,6 @@ #include #include #include -#include #include "blk.h" #include "blk-mq.h" @@ -139,7 +138,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front) static void blk_account_io_flush(struct request *rq) { - struct hd_struct *part = &rq->rq_disk->part0; + struct block_device *part = rq->rq_disk->part0; part_stat_lock(); part_stat_inc(part, ios[STAT_FLUSH]); @@ -474,9 +473,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, INIT_LIST_HEAD(&fq->flush_queue[1]); INIT_LIST_HEAD(&fq->flush_data_in_flight); - lockdep_register_key(&fq->key); - lockdep_set_class(&fq->mq_flush_lock, &fq->key); - return fq; fail_rq: @@ -491,7 +487,31 @@ void blk_free_flush_queue(struct blk_flush_queue *fq) if (!fq) return; - lockdep_unregister_key(&fq->key); kfree(fq->flush_rq); kfree(fq); } + +/* + * Allow driver to set its own lock class to fq->mq_flush_lock for + * avoiding lockdep complaint. + * + * flush_end_io() may be called recursively from some driver, such as + * nvme-loop, so lockdep may complain 'possible recursive locking' because + * all 'struct blk_flush_queue' instance share same mq_flush_lock lock class + * key. We need to assign different lock class for these driver's + * fq->mq_flush_lock for avoiding the lockdep warning. + * + * Use dynamically allocated lock class key for each 'blk_flush_queue' + * instance is over-kill, and more worse it introduces horrible boot delay + * issue because synchronize_rcu() is implied in lockdep_unregister_key which + * is called for each hctx release. SCSI probing may synchronously create and + * destroy lots of MQ request_queues for non-existent devices, and some robot + * test kernel always enable lockdep option. It is observed that more than half + * an hour is taken during SCSI MQ probe with per-fq lock class. + */ +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key) +{ + lockdep_set_class(&hctx->fq->mq_flush_lock, key); +} +EXPORT_SYMBOL_GPL(blk_mq_hctx_set_fq_lock_class); diff --git a/block/blk-iocost.c b/block/blk-iocost.c index bbe86d1199dc..ffa418c0dcb1 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -39,7 +39,7 @@ * On top of that, a size cost proportional to the length of the IO is * added. While simple, this model captures the operational * characteristics of a wide varienty of devices well enough. Default - * paramters for several different classes of devices are provided and the + * parameters for several different classes of devices are provided and the * parameters can be configured from userspace via * /sys/fs/cgroup/io.cost.model. * @@ -77,7 +77,7 @@ * * This constitutes the basis of IO capacity distribution. Each cgroup's * vtime is running at a rate determined by its hweight. A cgroup tracks - * the vtime consumed by past IOs and can issue a new IO iff doing so + * the vtime consumed by past IOs and can issue a new IO if doing so * wouldn't outrun the current device vtime. Otherwise, the IO is * suspended until the vtime has progressed enough to cover it. * @@ -155,7 +155,7 @@ * Instead of debugfs or other clumsy monitoring mechanisms, this * controller uses a drgn based monitoring script - * tools/cgroup/iocost_monitor.py. For details on drgn, please see - * https://github.com/osandov/drgn. The ouput looks like the following. + * https://github.com/osandov/drgn. The output looks like the following. * * sdb RUN per=300ms cur_per=234.218:v203.695 busy= +1 vrate= 62.12% * active weight hweight% inflt% dbt delay usages% @@ -370,8 +370,6 @@ enum { AUTOP_SSD_FAST, }; -struct ioc_gq; - struct ioc_params { u32 qos[NR_QOS_PARAMS]; u64 i_lcoefs[NR_I_LCOEFS]; @@ -492,7 +490,7 @@ struct ioc_gq { /* * `vtime` is this iocg's vtime cursor which progresses as IOs are * issued. If lagging behind device vtime, the delta represents - * the currently available IO budget. If runnning ahead, the + * the currently available IO budget. If running ahead, the * overage. * * `vtime_done` is the same but progressed on completion rather @@ -973,6 +971,58 @@ static void ioc_refresh_vrate(struct ioc *ioc, struct ioc_now *now) ioc->vtime_err = clamp(ioc->vtime_err, -vperiod, vperiod); } +static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct, + int nr_lagging, int nr_shortages, + int prev_busy_level, u32 *missed_ppm) +{ + u64 vrate = ioc->vtime_base_rate; + u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; + + if (!ioc->busy_level || (ioc->busy_level < 0 && nr_lagging)) { + if (ioc->busy_level != prev_busy_level || nr_lagging) + trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), + missed_ppm, rq_wait_pct, + nr_lagging, nr_shortages); + + return; + } + + /* rq_wait signal is always reliable, ignore user vrate_min */ + if (rq_wait_pct > RQ_WAIT_BUSY_PCT) + vrate_min = VRATE_MIN; + + /* + * If vrate is out of bounds, apply clamp gradually as the + * bounds can change abruptly. Otherwise, apply busy_level + * based adjustment. + */ + if (vrate < vrate_min) { + vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), 100); + vrate = min(vrate, vrate_min); + } else if (vrate > vrate_max) { + vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), 100); + vrate = max(vrate, vrate_max); + } else { + int idx = min_t(int, abs(ioc->busy_level), + ARRAY_SIZE(vrate_adj_pct) - 1); + u32 adj_pct = vrate_adj_pct[idx]; + + if (ioc->busy_level > 0) + adj_pct = 100 - adj_pct; + else + adj_pct = 100 + adj_pct; + + vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), + vrate_min, vrate_max); + } + + trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, + nr_lagging, nr_shortages); + + ioc->vtime_base_rate = vrate; + ioc_refresh_margins(ioc); +} + /* take a snapshot of the current [v]time and vrate */ static void ioc_now(struct ioc *ioc, struct ioc_now *now) { @@ -1046,7 +1096,7 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, /* * The delta between inuse and active sums indicates that - * that much of weight is being given away. Parent's inuse + * much of weight is being given away. Parent's inuse * and active should reflect the ratio. */ if (parent->child_active_sum) { @@ -2071,13 +2121,88 @@ static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors, } } +/* + * Check the active iocgs' state to avoid oversleeping and deactive + * idle iocgs. + * + * Since waiters determine the sleep durations based on the vrate + * they saw at the time of sleep, if vrate has increased, some + * waiters could be sleeping for too long. Wake up tardy waiters + * which should have woken up in the last period and expire idle + * iocgs. + */ +static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) +{ + int nr_debtors = 0; + struct ioc_gq *iocg, *tiocg; + + list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { + if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && + !iocg->delay && !iocg_is_idle(iocg)) + continue; + + spin_lock(&iocg->waitq.lock); + + /* flush wait and indebt stat deltas */ + if (iocg->wait_since) { + iocg->local_stat.wait_us += now->now - iocg->wait_since; + iocg->wait_since = now->now; + } + if (iocg->indebt_since) { + iocg->local_stat.indebt_us += + now->now - iocg->indebt_since; + iocg->indebt_since = now->now; + } + if (iocg->indelay_since) { + iocg->local_stat.indelay_us += + now->now - iocg->indelay_since; + iocg->indelay_since = now->now; + } + + if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || + iocg->delay) { + /* might be oversleeping vtime / hweight changes, kick */ + iocg_kick_waitq(iocg, true, now); + if (iocg->abs_vdebt || iocg->delay) + nr_debtors++; + } else if (iocg_is_idle(iocg)) { + /* no waiter and idle, deactivate */ + u64 vtime = atomic64_read(&iocg->vtime); + s64 excess; + + /* + * @iocg has been inactive for a full duration and will + * have a high budget. Account anything above target as + * error and throw away. On reactivation, it'll start + * with the target budget. + */ + excess = now->vnow - vtime - ioc->margins.target; + if (excess > 0) { + u32 old_hwi; + + current_hweight(iocg, NULL, &old_hwi); + ioc->vtime_err -= div64_u64(excess * old_hwi, + WEIGHT_ONE); + } + + __propagate_weights(iocg, 0, 0, false, now); + list_del_init(&iocg->active_list); + } + + spin_unlock(&iocg->waitq.lock); + } + + commit_weights(ioc); + return nr_debtors; +} + static void ioc_timer_fn(struct timer_list *timer) { struct ioc *ioc = container_of(timer, struct ioc, timer); struct ioc_gq *iocg, *tiocg; struct ioc_now now; LIST_HEAD(surpluses); - int nr_debtors = 0, nr_shortages = 0, nr_lagging = 0; + int nr_debtors, nr_shortages = 0, nr_lagging = 0; u64 usage_us_sum = 0; u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; @@ -2099,68 +2224,7 @@ static void ioc_timer_fn(struct timer_list *timer) return; } - /* - * Waiters determine the sleep durations based on the vrate they - * saw at the time of sleep. If vrate has increased, some waiters - * could be sleeping for too long. Wake up tardy waiters which - * should have woken up in the last period and expire idle iocgs. - */ - list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { - if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && - !iocg->delay && !iocg_is_idle(iocg)) - continue; - - spin_lock(&iocg->waitq.lock); - - /* flush wait and indebt stat deltas */ - if (iocg->wait_since) { - iocg->local_stat.wait_us += now.now - iocg->wait_since; - iocg->wait_since = now.now; - } - if (iocg->indebt_since) { - iocg->local_stat.indebt_us += - now.now - iocg->indebt_since; - iocg->indebt_since = now.now; - } - if (iocg->indelay_since) { - iocg->local_stat.indelay_us += - now.now - iocg->indelay_since; - iocg->indelay_since = now.now; - } - - if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || - iocg->delay) { - /* might be oversleeping vtime / hweight changes, kick */ - iocg_kick_waitq(iocg, true, &now); - if (iocg->abs_vdebt || iocg->delay) - nr_debtors++; - } else if (iocg_is_idle(iocg)) { - /* no waiter and idle, deactivate */ - u64 vtime = atomic64_read(&iocg->vtime); - s64 excess; - - /* - * @iocg has been inactive for a full duration and will - * have a high budget. Account anything above target as - * error and throw away. On reactivation, it'll start - * with the target budget. - */ - excess = now.vnow - vtime - ioc->margins.target; - if (excess > 0) { - u32 old_hwi; - - current_hweight(iocg, NULL, &old_hwi); - ioc->vtime_err -= div64_u64(excess * old_hwi, - WEIGHT_ONE); - } - - __propagate_weights(iocg, 0, 0, false, &now); - list_del_init(&iocg->active_list); - } - - spin_unlock(&iocg->waitq.lock); - } - commit_weights(ioc); + nr_debtors = ioc_check_iocgs(ioc, &now); /* * Wait and indebt stat are flushed above and the donation calculation @@ -2170,8 +2234,8 @@ static void ioc_timer_fn(struct timer_list *timer) /* calc usage and see whether some weights need to be moved around */ list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { - u64 vdone, vtime, usage_us, usage_dur; - u32 usage, hw_active, hw_inuse; + u64 vdone, vtime, usage_us; + u32 hw_active, hw_inuse; /* * Collect unused and wind vtime closer to vnow to prevent @@ -2202,30 +2266,32 @@ static void ioc_timer_fn(struct timer_list *timer) usage_us = iocg->usage_delta_us; usage_us_sum += usage_us; - if (vdone != vtime) { - u64 inflight_us = DIV64_U64_ROUND_UP( - cost_to_abs_cost(vtime - vdone, hw_inuse), - ioc->vtime_base_rate); - usage_us = max(usage_us, inflight_us); - } - - /* convert to hweight based usage ratio */ - if (time_after64(iocg->activated_at, ioc->period_at)) - usage_dur = max_t(u64, now.now - iocg->activated_at, 1); - else - usage_dur = max_t(u64, now.now - ioc->period_at, 1); - - usage = clamp_t(u32, - DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, - usage_dur), - 1, WEIGHT_ONE); - /* see whether there's surplus vtime */ WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); if (hw_inuse < hw_active || (!waitqueue_active(&iocg->waitq) && time_before64(vtime, now.vnow - ioc->margins.low))) { - u32 hwa, old_hwi, hwm, new_hwi; + u32 hwa, old_hwi, hwm, new_hwi, usage; + u64 usage_dur; + + if (vdone != vtime) { + u64 inflight_us = DIV64_U64_ROUND_UP( + cost_to_abs_cost(vtime - vdone, hw_inuse), + ioc->vtime_base_rate); + + usage_us = max(usage_us, inflight_us); + } + + /* convert to hweight based usage ratio */ + if (time_after64(iocg->activated_at, ioc->period_at)) + usage_dur = max_t(u64, now.now - iocg->activated_at, 1); + else + usage_dur = max_t(u64, now.now - ioc->period_at, 1); + + usage = clamp_t(u32, + DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, + usage_dur), + 1, WEIGHT_ONE); /* * Already donating or accumulated enough to start. @@ -2309,51 +2375,8 @@ static void ioc_timer_fn(struct timer_list *timer) ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); - if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) { - u64 vrate = ioc->vtime_base_rate; - u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; - - /* rq_wait signal is always reliable, ignore user vrate_min */ - if (rq_wait_pct > RQ_WAIT_BUSY_PCT) - vrate_min = VRATE_MIN; - - /* - * If vrate is out of bounds, apply clamp gradually as the - * bounds can change abruptly. Otherwise, apply busy_level - * based adjustment. - */ - if (vrate < vrate_min) { - vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), - 100); - vrate = min(vrate, vrate_min); - } else if (vrate > vrate_max) { - vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), - 100); - vrate = max(vrate, vrate_max); - } else { - int idx = min_t(int, abs(ioc->busy_level), - ARRAY_SIZE(vrate_adj_pct) - 1); - u32 adj_pct = vrate_adj_pct[idx]; - - if (ioc->busy_level > 0) - adj_pct = 100 - adj_pct; - else - adj_pct = 100 + adj_pct; - - vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), - vrate_min, vrate_max); - } - - trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, - nr_lagging, nr_shortages); - - ioc->vtime_base_rate = vrate; - ioc_refresh_margins(ioc); - } else if (ioc->busy_level != prev_busy_level || nr_lagging) { - trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), - missed_ppm, rq_wait_pct, nr_lagging, - nr_shortages); - } + ioc_adjust_base_vrate(ioc, rq_wait_pct, nr_lagging, nr_shortages, + prev_busy_level, missed_ppm); ioc_refresh_params(ioc, false); @@ -2400,7 +2423,7 @@ static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, return cost; /* - * We only increase inuse during period and do so iff the margin has + * We only increase inuse during period and do so if the margin has * deteriorated since the previous adjustment. */ if (margin >= iocg->saved_margin || margin >= margins->low || @@ -3120,23 +3143,23 @@ static const match_table_t qos_tokens = { static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { - struct gendisk *disk; + struct block_device *bdev; struct ioc *ioc; u32 qos[NR_QOS_PARAMS]; bool enable, user; char *p; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); if (!ioc) { - ret = blk_iocost_init(disk->queue); + ret = blk_iocost_init(bdev->bd_disk->queue); if (ret) goto err; - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); } spin_lock_irq(&ioc->lock); @@ -3231,12 +3254,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return nbytes; einval: ret = -EINVAL; err: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return ret; } @@ -3287,23 +3310,23 @@ static const match_table_t i_lcoef_tokens = { static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { - struct gendisk *disk; + struct block_device *bdev; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; char *p; int ret; - disk = blkcg_conf_get_disk(&input); - if (IS_ERR(disk)) - return PTR_ERR(disk); + bdev = blkcg_conf_open_bdev(&input); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); if (!ioc) { - ret = blk_iocost_init(disk->queue); + ret = blk_iocost_init(bdev->bd_disk->queue); if (ret) goto err; - ioc = q_to_ioc(disk->queue); + ioc = q_to_ioc(bdev->bd_disk->queue); } spin_lock_irq(&ioc->lock); @@ -3356,13 +3379,13 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return nbytes; einval: ret = -EINVAL; err: - put_disk_and_module(disk); + blkdev_put_no_open(bdev); return ret; } diff --git a/block/blk-lib.c b/block/blk-lib.c index e90614fd8d6a..752f9c722062 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -65,7 +65,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, /* In case the discard request is in a partition */ if (bdev_is_partition(bdev)) - part_offset = bdev->bd_part->start_sect; + part_offset = bdev->bd_start_sect; while (nr_sects) { sector_t granularity_aligned_lba, req_sects; diff --git a/block/blk-merge.c b/block/blk-merge.c index 97b7c2821565..808768f6b174 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -279,6 +279,14 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, return NULL; split: *segs = nsegs; + + /* + * Bio splitting may cause subtle trouble such as hang when doing sync + * iopoll in direct IO routine. Given performance gain of iopoll for + * big IO can be trival, disable iopoll when split needed. + */ + bio->bi_opf &= ~REQ_HIPRI; + return bio_split(bio, sectors, GFP_NOIO, bs); } @@ -338,7 +346,7 @@ void __blk_queue_split(struct bio **bio, unsigned int *nr_segs) split->bi_opf |= REQ_NOMERGE; bio_chain(split, *bio); - trace_block_split(q, split, (*bio)->bi_iter.bi_sector); + trace_block_split(split, (*bio)->bi_iter.bi_sector); submit_bio_noacct(*bio); *bio = split; } @@ -683,8 +691,6 @@ static void blk_account_io_merge_request(struct request *req) part_stat_lock(); part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); part_stat_unlock(); - - hd_struct_put(req->part); } } @@ -801,7 +807,7 @@ static struct request *attempt_merge(struct request_queue *q, */ blk_account_io_merge_request(next); - trace_block_rq_merge(q, next); + trace_block_rq_merge(next); /* * ownership of bio passed from next to req, return 'next' for @@ -924,7 +930,7 @@ static enum bio_merge_status bio_attempt_back_merge(struct request *req, if (!ll_back_merge_fn(req, bio, nr_segs)) return BIO_MERGE_FAILED; - trace_block_bio_backmerge(req->q, req, bio); + trace_block_bio_backmerge(bio); rq_qos_merge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) @@ -948,7 +954,7 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req, if (!ll_front_merge_fn(req, bio, nr_segs)) return BIO_MERGE_FAILED; - trace_block_bio_frontmerge(req->q, req, bio); + trace_block_bio_frontmerge(bio); rq_qos_merge(req->q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index d1eafe2c045c..deff4e826e23 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -386,7 +386,7 @@ EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge); void blk_mq_sched_request_inserted(struct request *rq) { - trace_block_rq_insert(rq->q, rq); + trace_block_rq_insert(rq); } EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); diff --git a/block/blk-mq.c b/block/blk-mq.c index d35b3c0c876a..14a44699e9b6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -95,7 +95,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, } struct mq_inflight { - struct hd_struct *part; + struct block_device *part; unsigned int inflight[2]; }; @@ -105,13 +105,15 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, { struct mq_inflight *mi = priv; - if (rq->part == mi->part && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) + if ((!mi->part->bd_partno || rq->part == mi->part) && + blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; return true; } -unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) +unsigned int blk_mq_in_flight(struct request_queue *q, + struct block_device *part) { struct mq_inflight mi = { .part = part }; @@ -120,8 +122,8 @@ unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) return mi.inflight[0] + mi.inflight[1]; } -void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]) +void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, + unsigned int inflight[2]) { struct mq_inflight mi = { .part = part }; @@ -729,7 +731,7 @@ void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; - trace_block_rq_issue(q, rq); + trace_block_rq_issue(rq); if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { rq->io_start_time_ns = ktime_get_ns(); @@ -756,7 +758,7 @@ static void __blk_mq_requeue_request(struct request *rq) blk_mq_put_driver_tag(rq); - trace_block_rq_requeue(q, rq); + trace_block_rq_requeue(rq); rq_qos_requeue(q, rq); if (blk_mq_request_started(rq)) { @@ -1590,7 +1592,7 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue. * @hctx: Pointer to the hardware queue to run. * @async: If we want to run the queue asynchronously. - * @msecs: Microseconds of delay to wait before running the queue. + * @msecs: Milliseconds of delay to wait before running the queue. * * If !@async, try to run the queue now. Else, run the queue asynchronously and * with a delay of @msecs. @@ -1619,7 +1621,7 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, /** * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. * @hctx: Pointer to the hardware queue to run. - * @msecs: Microseconds of delay to wait before running the queue. + * @msecs: Milliseconds of delay to wait before running the queue. * * Run a hardware queue asynchronously with a delay of @msecs. */ @@ -1683,7 +1685,7 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues); /** * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously. * @q: Pointer to the request queue to run. - * @msecs: Microseconds of delay to wait before running the queues. + * @msecs: Milliseconds of delay to wait before running the queues. */ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) { @@ -1817,7 +1819,7 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, lockdep_assert_held(&ctx->lock); - trace_block_rq_insert(hctx->queue, rq); + trace_block_rq_insert(rq); if (at_head) list_add(&rq->queuelist, &ctx->rq_lists[type]); @@ -1874,7 +1876,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, */ list_for_each_entry(rq, list, queuelist) { BUG_ON(rq->mq_ctx != ctx); - trace_block_rq_insert(hctx->queue, rq); + trace_block_rq_insert(rq); } spin_lock(&ctx->lock); @@ -2155,6 +2157,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) unsigned int nr_segs; blk_qc_t cookie; blk_status_t ret; + bool hipri; blk_queue_bounce(q, &bio); __blk_queue_split(&bio, &nr_segs); @@ -2171,6 +2174,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) rq_qos_throttle(q, bio); + hipri = bio->bi_opf & REQ_HIPRI; + data.cmd_flags = bio->bi_opf; rq = __blk_mq_alloc_request(&data); if (unlikely(!rq)) { @@ -2180,7 +2185,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) goto queue_exit; } - trace_block_getrq(q, bio, bio->bi_opf); + trace_block_getrq(bio); rq_qos_track(q, rq, bio); @@ -2263,6 +2268,8 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) blk_mq_sched_insert_request(rq, false, true, true); } + if (!hipri) + return BLK_QC_T_NONE; return cookie; queue_exit: blk_queue_exit(q); @@ -3373,6 +3380,12 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, return 0; } +static int blk_mq_alloc_tag_set_tags(struct blk_mq_tag_set *set, + int new_nr_hw_queues) +{ + return blk_mq_realloc_tag_set_tags(set, 0, new_nr_hw_queues); +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -3426,7 +3439,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) set->nr_hw_queues = nr_cpu_ids; - if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0) + if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0) return -ENOMEM; ret = -ENOMEM; @@ -3861,9 +3874,10 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin) * the state. Like for the other success return cases, the * caller is responsible for checking if the IO completed. If * the IO isn't complete, we'll get called again and will go - * straight to the busy poll loop. + * straight to the busy poll loop. If specified not to spin, + * we also should not sleep. */ - if (blk_mq_poll_hybrid(q, hctx, cookie)) + if (spin && blk_mq_poll_hybrid(q, hctx, cookie)) return 1; hctx->poll_considered++; diff --git a/block/blk-mq.h b/block/blk-mq.h index a52703c98b77..c1458d9502f1 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -99,7 +99,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue * * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue * @q: request queue * @flags: request command flags - * @cpu: cpu ctx + * @ctx: software queue cpu ctx */ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, unsigned int flags, @@ -182,9 +182,10 @@ static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) return hctx->nr_ctx && hctx->tags; } -unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part); -void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); +unsigned int blk_mq_in_flight(struct request_queue *q, + struct block_device *part); +void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, + unsigned int inflight[2]); static inline void blk_mq_put_dispatch_budget(struct request_queue *q) { diff --git a/block/blk-settings.c b/block/blk-settings.c index 659cdb8a07fe..43990b1d148b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -157,10 +157,16 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto __func__, max_hw_sectors); } + max_hw_sectors = round_down(max_hw_sectors, + limits->logical_block_size >> SECTOR_SHIFT); limits->max_hw_sectors = max_hw_sectors; + max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS); + max_sectors = round_down(max_sectors, + limits->logical_block_size >> SECTOR_SHIFT); limits->max_sectors = max_sectors; + q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); @@ -321,13 +327,20 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); **/ void blk_queue_logical_block_size(struct request_queue *q, unsigned int size) { - q->limits.logical_block_size = size; + struct queue_limits *limits = &q->limits; - if (q->limits.physical_block_size < size) - q->limits.physical_block_size = size; + limits->logical_block_size = size; - if (q->limits.io_min < q->limits.physical_block_size) - q->limits.io_min = q->limits.physical_block_size; + if (limits->physical_block_size < size) + limits->physical_block_size = size; + + if (limits->io_min < limits->physical_block_size) + limits->io_min = limits->physical_block_size; + + limits->max_hw_sectors = + round_down(limits->max_hw_sectors, size >> SECTOR_SHIFT); + limits->max_sectors = + round_down(limits->max_sectors, size >> SECTOR_SHIFT); } EXPORT_SYMBOL(blk_queue_logical_block_size); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b771c4299982..d52cac9f3a7c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -587,6 +587,7 @@ static void throtl_pd_online(struct blkg_policy_data *pd) tg_update_has_rules(tg); } +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW static void blk_throtl_update_limit_valid(struct throtl_data *td) { struct cgroup_subsys_state *pos_css; @@ -607,6 +608,11 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td) td->limit_valid[LIMIT_LOW] = low_valid; } +#else +static inline void blk_throtl_update_limit_valid(struct throtl_data *td) +{ +} +#endif static void throtl_upgrade_state(struct throtl_data *td); static void throtl_pd_offline(struct blkg_policy_data *pd) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index fd410086fe1d..0321ca83e73f 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -835,7 +835,6 @@ int wbt_init(struct request_queue *q) rwb->enable_state = WBT_STATE_ON_DEFAULT; rwb->wc = 1; rwb->rq_depth.default_depth = RWB_DEF_DEPTH; - wbt_update_limits(rwb); /* * Assign rwb and add the stats callback. diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 6817a673e5ce..7a68b6e4300c 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -508,15 +508,29 @@ int blk_revalidate_disk_zones(struct gendisk *disk, noio_flag = memalloc_noio_save(); ret = disk->fops->report_zones(disk, 0, UINT_MAX, blk_revalidate_zone_cb, &args); + if (!ret) { + pr_warn("%s: No zones reported\n", disk->disk_name); + ret = -ENODEV; + } memalloc_noio_restore(noio_flag); + /* + * If zones where reported, make sure that the entire disk capacity + * has been checked. + */ + if (ret > 0 && args.sector != get_capacity(disk)) { + pr_warn("%s: Missing zones from sector %llu\n", + disk->disk_name, args.sector); + ret = -ENODEV; + } + /* * Install the new bitmaps and update nr_zones only once the queue is * stopped and all I/Os are completed (i.e. a scheduler is not * referencing the bitmaps). */ blk_mq_freeze_queue(q); - if (ret >= 0) { + if (ret > 0) { blk_queue_chunk_sectors(q, args.zone_sectors); q->nr_zones = args.nr_zones; swap(q->seq_zones_wlock, args.seq_zones_wlock); diff --git a/block/blk.h b/block/blk.h index dfab98465db9..7550364c326c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -25,7 +25,6 @@ struct blk_flush_queue { struct list_head flush_data_in_flight; struct request *flush_rq; - struct lock_class_key key; spinlock_t mq_flush_lock; }; @@ -91,18 +90,6 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, return __bvec_gap_to_prev(q, bprv, offset); } -static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, - unsigned int nr_segs) -{ - rq->nr_phys_segments = nr_segs; - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; - rq->ioprio = bio_prio(bio); - - if (bio->bi_disk) - rq->rq_disk = bio->bi_disk; -} - #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); @@ -215,7 +202,7 @@ static inline void elevator_exit(struct request_queue *q, __elevator_exit(q, e); } -struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); +struct block_device *__disk_get_part(struct gendisk *disk, int partno); ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); @@ -348,97 +335,21 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q); static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} #endif -struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); +struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); -int blk_alloc_devt(struct hd_struct *part, dev_t *devt); +int blk_alloc_devt(struct block_device *part, dev_t *devt); void blk_free_devt(dev_t devt); -void blk_invalidate_devt(dev_t devt); char *disk_name(struct gendisk *hd, int partno, char *buf); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -void delete_partition(struct hd_struct *part); +void delete_partition(struct block_device *part); int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int bdev_del_partition(struct block_device *bdev, int partno); int bdev_resize_partition(struct block_device *bdev, int partno, sector_t start, sector_t length); int disk_expand_part_tbl(struct gendisk *disk, int target); -int hd_ref_init(struct hd_struct *part); - -/* no need to get/put refcount of part0 */ -static inline int hd_struct_try_get(struct hd_struct *part) -{ - if (part->partno) - return percpu_ref_tryget_live(&part->ref); - return 1; -} - -static inline void hd_struct_put(struct hd_struct *part) -{ - if (part->partno) - percpu_ref_put(&part->ref); -} - -static inline void hd_free_part(struct hd_struct *part) -{ - free_percpu(part->dkstats); - kfree(part->info); - percpu_ref_exit(&part->ref); -} - -/* - * Any access of part->nr_sects which is not protected by partition - * bd_mutex or gendisk bdev bd_mutex, should be done using this - * accessor function. - * - * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption - * on. - */ -static inline sector_t part_nr_sects_read(struct hd_struct *part) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - sector_t nr_sects; - unsigned seq; - do { - seq = read_seqcount_begin(&part->nr_sects_seq); - nr_sects = part->nr_sects; - } while (read_seqcount_retry(&part->nr_sects_seq, seq)); - return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - sector_t nr_sects; - - preempt_disable(); - nr_sects = part->nr_sects; - preempt_enable(); - return nr_sects; -#else - return part->nr_sects; -#endif -} - -/* - * Should be called with mutex lock held (typically bd_mutex) of partition - * to provide mutual exlusion among writers otherwise seqcount might be - * left in wrong state leaving the readers spinning infinitely. - */ -static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - preempt_disable(); - write_seqcount_begin(&part->nr_sects_seq); - part->nr_sects = size; - write_seqcount_end(&part->nr_sects_seq); - preempt_enable(); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - preempt_disable(); - part->nr_sects = size; - preempt_enable(); -#else - part->nr_sects = size; -#endif -} int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, diff --git a/block/bounce.c b/block/bounce.c index 162a6eee8999..d3f51acd6e3b 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -340,7 +340,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, } } - trace_block_bio_bounce(q, *bio_orig); + trace_block_bio_bounce(*bio_orig); bio->bi_flags |= (1 << BIO_BOUNCED); diff --git a/block/genhd.c b/block/genhd.c index 9387f050c248..b84b8671e627 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -26,17 +25,13 @@ #include "blk.h" -static DEFINE_MUTEX(block_class_lock); static struct kobject *block_depr; +DECLARE_RWSEM(bdev_lookup_sem); + /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) - -/* For extended devt allocation. ext_devt_lock prevents look up - * results from going away underneath its user. - */ -static DEFINE_SPINLOCK(ext_devt_lock); -static DEFINE_IDR(ext_devt_idr); +static DEFINE_IDA(ext_devt_ida); static void disk_check_events(struct disk_events *ev, unsigned int *clearing_ptr); @@ -45,30 +40,49 @@ static void disk_add_events(struct gendisk *disk); static void disk_del_events(struct gendisk *disk); static void disk_release_events(struct gendisk *disk); +void set_capacity(struct gendisk *disk, sector_t sectors) +{ + struct block_device *bdev = disk->part0; + + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); +} +EXPORT_SYMBOL(set_capacity); + /* - * Set disk capacity and notify if the size is not currently - * zero and will not be set to zero + * Set disk capacity and notify if the size is not currently zero and will not + * be set to zero. Returns true if a uevent was sent, otherwise false. */ -bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, - bool update_bdev) +bool set_capacity_and_notify(struct gendisk *disk, sector_t size) { sector_t capacity = get_capacity(disk); + char *envp[] = { "RESIZE=1", NULL }; set_capacity(disk, size); - if (update_bdev) - revalidate_disk_size(disk, true); - if (capacity != size && capacity != 0 && size != 0) { - char *envp[] = { "RESIZE=1", NULL }; + /* + * Only print a message and send a uevent if the gendisk is user visible + * and alive. This avoids spamming the log and udev when setting the + * initial capacity during probing. + */ + if (size == capacity || + (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) + return false; - kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); - return true; - } + pr_info("%s: detected capacity change from %lld to %lld\n", + disk->disk_name, size, capacity); - return false; + /* + * Historically we did not send a uevent for changes to/from an empty + * device. + */ + if (!capacity || !size) + return false; + kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); + return true; } - -EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify); +EXPORT_SYMBOL_GPL(set_capacity_and_notify); /* * Format the device name of the indicated disk into the supplied buffer and @@ -92,13 +106,14 @@ const char *bdevname(struct block_device *bdev, char *buf) } EXPORT_SYMBOL(bdevname); -static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) +static void part_stat_read_all(struct block_device *part, + struct disk_stats *stat) { int cpu; memset(stat, 0, sizeof(struct disk_stats)); for_each_possible_cpu(cpu) { - struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu); + struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu); int group; for (group = 0; group < NR_STAT_GROUPS; group++) { @@ -112,7 +127,7 @@ static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) } } -static unsigned int part_in_flight(struct hd_struct *part) +static unsigned int part_in_flight(struct block_device *part) { unsigned int inflight = 0; int cpu; @@ -127,7 +142,8 @@ static unsigned int part_in_flight(struct hd_struct *part) return inflight; } -static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2]) +static void part_in_flight_rw(struct block_device *part, + unsigned int inflight[2]) { int cpu; @@ -143,7 +159,7 @@ static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2]) inflight[1] = 0; } -struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) +struct block_device *__disk_get_part(struct gendisk *disk, int partno) { struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); @@ -152,33 +168,6 @@ struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) return rcu_dereference(ptbl->part[partno]); } -/** - * disk_get_part - get partition - * @disk: disk to look partition from - * @partno: partition number - * - * Look for partition @partno from @disk. If found, increment - * reference count and return it. - * - * CONTEXT: - * Don't care. - * - * RETURNS: - * Pointer to the found partition on success, NULL if not found. - */ -struct hd_struct *disk_get_part(struct gendisk *disk, int partno) -{ - struct hd_struct *part; - - rcu_read_lock(); - part = __disk_get_part(disk, partno); - if (part) - get_device(part_to_dev(part)); - rcu_read_unlock(); - - return part; -} - /** * disk_part_iter_init - initialize partition iterator * @piter: iterator to initialize @@ -223,14 +212,13 @@ EXPORT_SYMBOL_GPL(disk_part_iter_init); * CONTEXT: * Don't care. */ -struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) +struct block_device *disk_part_iter_next(struct disk_part_iter *piter) { struct disk_part_tbl *ptbl; int inc, end; /* put the last partition */ - disk_put_part(piter->part); - piter->part = NULL; + disk_part_iter_exit(piter); /* get part_tbl */ rcu_read_lock(); @@ -251,19 +239,20 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) /* iterate to the next partition */ for (; piter->idx != end; piter->idx += inc) { - struct hd_struct *part; + struct block_device *part; part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!part_nr_sects_read(part) && + if (!bdev_nr_sectors(part) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && piter->idx == 0)) continue; - get_device(part_to_dev(part)); - piter->part = part; + piter->part = bdgrab(part); + if (!piter->part) + continue; piter->idx += inc; break; } @@ -285,15 +274,16 @@ EXPORT_SYMBOL_GPL(disk_part_iter_next); */ void disk_part_iter_exit(struct disk_part_iter *piter) { - disk_put_part(piter->part); + if (piter->part) + bdput(piter->part); piter->part = NULL; } EXPORT_SYMBOL_GPL(disk_part_iter_exit); -static inline int sector_in_part(struct hd_struct *part, sector_t sector) +static inline int sector_in_part(struct block_device *part, sector_t sector) { - return part->start_sect <= sector && - sector < part->start_sect + part_nr_sects_read(part); + return part->bd_start_sect <= sector && + sector < part->bd_start_sect + bdev_nr_sectors(part); } /** @@ -305,44 +295,34 @@ static inline int sector_in_part(struct hd_struct *part, sector_t sector) * primarily used for stats accounting. * * CONTEXT: - * RCU read locked. The returned partition pointer is always valid - * because its refcount is grabbed except for part0, which lifetime - * is same with the disk. + * RCU read locked. * * RETURNS: * Found partition on success, part0 is returned if no partition matches * or the matched partition is being deleted. */ -struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) +struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) { struct disk_part_tbl *ptbl; - struct hd_struct *part; + struct block_device *part; int i; rcu_read_lock(); ptbl = rcu_dereference(disk->part_tbl); part = rcu_dereference(ptbl->last_lookup); - if (part && sector_in_part(part, sector) && hd_struct_try_get(part)) + if (part && sector_in_part(part, sector)) goto out_unlock; for (i = 1; i < ptbl->len; i++) { part = rcu_dereference(ptbl->part[i]); - if (part && sector_in_part(part, sector)) { - /* - * only live partition can be cached for lookup, - * so use-after-free on cached & deleting partition - * can be avoided - */ - if (!hd_struct_try_get(part)) - break; rcu_assign_pointer(ptbl->last_lookup, part); goto out_unlock; } } - part = &disk->part0; + part = disk->part0; out_unlock: rcu_read_unlock(); return part; @@ -393,7 +373,9 @@ static struct blk_major_name { struct blk_major_name *next; int major; char name[16]; + void (*probe)(dev_t devt); } *major_names[BLKDEV_MAJOR_HASH_SIZE]; +static DEFINE_MUTEX(major_names_lock); /* index in the above - for now: assume no multimajor ranges */ static inline int major_to_index(unsigned major) @@ -406,20 +388,21 @@ void blkdev_show(struct seq_file *seqf, off_t offset) { struct blk_major_name *dp; - mutex_lock(&block_class_lock); + mutex_lock(&major_names_lock); for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) if (dp->major == offset) seq_printf(seqf, "%3d %s\n", dp->major, dp->name); - mutex_unlock(&block_class_lock); + mutex_unlock(&major_names_lock); } #endif /* CONFIG_PROC_FS */ /** - * register_blkdev - register a new block device + * __register_blkdev - register a new block device * * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If * @major = 0, try to allocate any unused major number. * @name: the name of the new block device as a zero terminated string + * @probe: allback that is called on access to any minor number of @major * * The @name must be unique within the system. * @@ -433,13 +416,16 @@ void blkdev_show(struct seq_file *seqf, off_t offset) * * See Documentation/admin-guide/devices.txt for the list of allocated * major numbers. + * + * Use register_blkdev instead for any new code. */ -int register_blkdev(unsigned int major, const char *name) +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)) { struct blk_major_name **n, *p; int index, ret = 0; - mutex_lock(&block_class_lock); + mutex_lock(&major_names_lock); /* temporary */ if (major == 0) { @@ -473,6 +459,7 @@ int register_blkdev(unsigned int major, const char *name) } p->major = major; + p->probe = probe; strlcpy(p->name, name, sizeof(p->name)); p->next = NULL; index = major_to_index(major); @@ -492,11 +479,10 @@ int register_blkdev(unsigned int major, const char *name) kfree(p); } out: - mutex_unlock(&block_class_lock); + mutex_unlock(&major_names_lock); return ret; } - -EXPORT_SYMBOL(register_blkdev); +EXPORT_SYMBOL(__register_blkdev); void unregister_blkdev(unsigned int major, const char *name) { @@ -504,7 +490,7 @@ void unregister_blkdev(unsigned int major, const char *name) struct blk_major_name *p = NULL; int index = major_to_index(major); - mutex_lock(&block_class_lock); + mutex_lock(&major_names_lock); for (n = &major_names[index]; *n; n = &(*n)->next) if ((*n)->major == major) break; @@ -514,14 +500,12 @@ void unregister_blkdev(unsigned int major, const char *name) p = *n; *n = p->next; } - mutex_unlock(&block_class_lock); + mutex_unlock(&major_names_lock); kfree(p); } EXPORT_SYMBOL(unregister_blkdev); -static struct kobj_map *bdev_map; - /** * blk_mangle_minor - scatter minor numbers apart * @minor: minor number to mangle @@ -555,8 +539,8 @@ static int blk_mangle_minor(int minor) } /** - * blk_alloc_devt - allocate a dev_t for a partition - * @part: partition to allocate dev_t for + * blk_alloc_devt - allocate a dev_t for a block device + * @bdev: block device to allocate dev_t for * @devt: out parameter for resulting dev_t * * Allocate a dev_t for block device. @@ -568,25 +552,18 @@ static int blk_mangle_minor(int minor) * CONTEXT: * Might sleep. */ -int blk_alloc_devt(struct hd_struct *part, dev_t *devt) +int blk_alloc_devt(struct block_device *bdev, dev_t *devt) { - struct gendisk *disk = part_to_disk(part); + struct gendisk *disk = bdev->bd_disk; int idx; /* in consecutive minor range? */ - if (part->partno < disk->minors) { - *devt = MKDEV(disk->major, disk->first_minor + part->partno); + if (bdev->bd_partno < disk->minors) { + *devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno); return 0; } - /* allocate ext devt */ - idr_preload(GFP_KERNEL); - - spin_lock_bh(&ext_devt_lock); - idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); - spin_unlock_bh(&ext_devt_lock); - - idr_preload_end(); + idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL); if (idx < 0) return idx == -ENOSPC ? -EBUSY : idx; @@ -605,26 +582,8 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) */ void blk_free_devt(dev_t devt) { - if (devt == MKDEV(0, 0)) - return; - - if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - spin_lock_bh(&ext_devt_lock); - idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - spin_unlock_bh(&ext_devt_lock); - } -} - -/* - * We invalidate devt by assigning NULL pointer for devt in idr. - */ -void blk_invalidate_devt(dev_t devt) -{ - if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - spin_lock_bh(&ext_devt_lock); - idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt))); - spin_unlock_bh(&ext_devt_lock); - } + if (MAJOR(devt) == BLOCK_EXT_MAJOR) + ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt))); } static char *bdevt_str(dev_t devt, char *buf) @@ -639,43 +598,6 @@ static char *bdevt_str(dev_t devt, char *buf) return buf; } -/* - * Register device numbers dev..(dev+range-1) - * range must be nonzero - * The hash chain is sorted on range, so that subranges can override. - */ -void blk_register_region(dev_t devt, unsigned long range, struct module *module, - struct kobject *(*probe)(dev_t, int *, void *), - int (*lock)(dev_t, void *), void *data) -{ - kobj_map(bdev_map, devt, range, module, probe, lock, data); -} - -EXPORT_SYMBOL(blk_register_region); - -void blk_unregister_region(dev_t devt, unsigned long range) -{ - kobj_unmap(bdev_map, devt, range); -} - -EXPORT_SYMBOL(blk_unregister_region); - -static struct kobject *exact_match(dev_t devt, int *partno, void *data) -{ - struct gendisk *p = data; - - return &disk_to_dev(p)->kobj; -} - -static int exact_lock(dev_t devt, void *data) -{ - struct gendisk *p = data; - - if (!get_disk_and_module(p)) - return -1; - return 0; -} - static void disk_scan_partitions(struct gendisk *disk) { struct block_device *bdev; @@ -694,7 +616,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, { struct device *ddev = disk_to_dev(disk); struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; int err; ddev->parent = parent; @@ -726,7 +648,8 @@ static void register_disk(struct device *parent, struct gendisk *disk, */ pm_runtime_set_memalloc_noio(ddev, true); - disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); + disk->part0->bd_holder_dir = + kobject_create_and_add("holders", &ddev->kobj); disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (disk->flags & GENHD_FL_HIDDEN) { @@ -743,7 +666,7 @@ static void register_disk(struct device *parent, struct gendisk *disk, /* announce possible partitions */ disk_part_iter_init(&piter, disk, 0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); + kobject_uevent(bdev_kobj(part), KOBJ_ADD); disk_part_iter_exit(&piter); if (disk->queue->backing_dev_info->dev) { @@ -792,7 +715,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, disk->flags |= GENHD_FL_UP; - retval = blk_alloc_devt(&disk->part0, &devt); + retval = blk_alloc_devt(disk->part0, &devt); if (retval) { WARN_ON(1); return; @@ -819,8 +742,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk, ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); WARN_ON(ret); bdi_set_owner(bdi, dev); - blk_register_region(disk_devt(disk), disk->minors, NULL, - exact_match, exact_lock, disk); + bdev_add(disk->part0, devt); } register_disk(parent, disk, groups); if (register_queue) @@ -850,23 +772,16 @@ void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk) } EXPORT_SYMBOL(device_add_disk_no_queue_reg); -static void invalidate_partition(struct gendisk *disk, int partno) +static void invalidate_partition(struct block_device *bdev) { - struct block_device *bdev; - - bdev = bdget_disk(disk, partno); - if (!bdev) - return; - fsync_bdev(bdev); __invalidate_device(bdev, true); /* - * Unhash the bdev inode for this device so that it gets evicted as soon - * as last inode reference is dropped. + * Unhash the bdev inode for this device so that it can't be looked + * up any more even if openers still hold references to it. */ remove_inode_hash(bdev->bd_inode); - bdput(bdev); } /** @@ -891,10 +806,13 @@ static void invalidate_partition(struct gendisk *disk, int partno) void del_gendisk(struct gendisk *disk) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; might_sleep(); + if (WARN_ON_ONCE(!disk->queue)) + return; + blk_integrity_del(disk); disk_del_events(disk); @@ -902,50 +820,39 @@ void del_gendisk(struct gendisk *disk) * Block lookups of the disk until all bdevs are unhashed and the * disk is marked as dead (GENHD_FL_UP cleared). */ - down_write(&disk->lookup_sem); + down_write(&bdev_lookup_sem); + /* invalidate stuff */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); while ((part = disk_part_iter_next(&piter))) { - invalidate_partition(disk, part->partno); + invalidate_partition(part); delete_partition(part); } disk_part_iter_exit(&piter); - invalidate_partition(disk, 0); + invalidate_partition(disk->part0); set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; - up_write(&disk->lookup_sem); + up_write(&bdev_lookup_sem); - if (!(disk->flags & GENHD_FL_HIDDEN)) + if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); - if (disk->queue) { + /* * Unregister bdi before releasing device numbers (as they can * get reused and we'd get clashes in sysfs). */ - if (!(disk->flags & GENHD_FL_HIDDEN)) - bdi_unregister(disk->queue->backing_dev_info); - blk_unregister_queue(disk); - } else { - WARN_ON(1); + bdi_unregister(disk->queue->backing_dev_info); } - if (!(disk->flags & GENHD_FL_HIDDEN)) - blk_unregister_region(disk_devt(disk), disk->minors); - /* - * Remove gendisk pointer from idr so that it cannot be looked up - * while RCU period before freeing gendisk is running to prevent - * use-after-free issues. Note that the device number stays - * "in-use" until we really free the gendisk. - */ - blk_invalidate_devt(disk_devt(disk)); + blk_unregister_queue(disk); - kobject_put(disk->part0.holder_dir); + kobject_put(disk->part0->bd_holder_dir); kobject_put(disk->slave_dir); - part_stat_set_all(&disk->part0, 0); - disk->part0.stamp = 0; + part_stat_set_all(disk->part0, 0); + disk->part0->bd_stamp = 0; if (!sysfs_deprecated) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); @@ -978,57 +885,24 @@ static ssize_t disk_badblocks_store(struct device *dev, return badblocks_store(disk->bb, page, len, 0); } -/** - * get_gendisk - get partitioning information for a given device - * @devt: device to get partitioning information for - * @partno: returned partition index - * - * This function gets the structure containing partitioning - * information for the given device @devt. - * - * Context: can sleep - */ -struct gendisk *get_gendisk(dev_t devt, int *partno) +void blk_request_module(dev_t devt) { - struct gendisk *disk = NULL; + unsigned int major = MAJOR(devt); + struct blk_major_name **n; - might_sleep(); - - if (MAJOR(devt) != BLOCK_EXT_MAJOR) { - struct kobject *kobj; - - kobj = kobj_lookup(bdev_map, devt, partno); - if (kobj) - disk = dev_to_disk(kobj_to_dev(kobj)); - } else { - struct hd_struct *part; - - spin_lock_bh(&ext_devt_lock); - part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - if (part && get_disk_and_module(part_to_disk(part))) { - *partno = part->partno; - disk = part_to_disk(part); + mutex_lock(&major_names_lock); + for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) { + if ((*n)->major == major && (*n)->probe) { + (*n)->probe(devt); + mutex_unlock(&major_names_lock); + return; } - spin_unlock_bh(&ext_devt_lock); } + mutex_unlock(&major_names_lock); - if (!disk) - return NULL; - - /* - * Synchronize with del_gendisk() to not return disk that is being - * destroyed. - */ - down_read(&disk->lookup_sem); - if (unlikely((disk->flags & GENHD_FL_HIDDEN) || - !(disk->flags & GENHD_FL_UP))) { - up_read(&disk->lookup_sem); - put_disk_and_module(disk); - disk = NULL; - } else { - up_read(&disk->lookup_sem); - } - return disk; + if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) + /* Make old-style 2.4 aliases work */ + request_module("block-major-%d", MAJOR(devt)); } /** @@ -1046,17 +920,16 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) */ struct block_device *bdget_disk(struct gendisk *disk, int partno) { - struct hd_struct *part; struct block_device *bdev = NULL; - part = disk_get_part(disk, partno); - if (part) - bdev = bdget_part(part); - disk_put_part(part); + rcu_read_lock(); + bdev = __disk_get_part(disk, partno); + if (bdev && !bdgrab(bdev)) + bdev = NULL; + rcu_read_unlock(); return bdev; } -EXPORT_SYMBOL(bdget_disk); /* * print a full list of all partitions - intended for places where the root @@ -1072,7 +945,7 @@ void __init printk_all_partitions(void) while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; char name_buf[BDEVNAME_SIZE]; char devt_buf[BDEVT_SIZE]; @@ -1091,13 +964,14 @@ void __init printk_all_partitions(void) */ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) { - bool is_part0 = part == &disk->part0; + bool is_part0 = part == disk->part0; printk("%s%s %10llu %s %s", is_part0 ? "" : " ", - bdevt_str(part_devt(part), devt_buf), - (unsigned long long)part_nr_sects_read(part) >> 1 - , disk_name(disk, part->partno, name_buf), - part->info ? part->info->uuid : ""); + bdevt_str(part->bd_dev, devt_buf), + bdev_nr_sectors(part) >> 1, + disk_name(disk, part->bd_partno, name_buf), + part->bd_meta_info ? + part->bd_meta_info->uuid : ""); if (is_part0) { if (dev->parent && dev->parent->driver) printk(" driver: %s\n", @@ -1173,7 +1047,7 @@ static int show_partition(struct seq_file *seqf, void *v) { struct gendisk *sgp = v; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ @@ -1187,9 +1061,9 @@ static int show_partition(struct seq_file *seqf, void *v) disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %7d %10llu %s\n", - MAJOR(part_devt(part)), MINOR(part_devt(part)), - (unsigned long long)part_nr_sects_read(part) >> 1, - disk_name(sgp, part->partno, buf)); + MAJOR(part->bd_dev), MINOR(part->bd_dev), + bdev_nr_sectors(part) >> 1, + disk_name(sgp, part->bd_partno, buf)); disk_part_iter_exit(&piter); return 0; @@ -1203,15 +1077,6 @@ static const struct seq_operations partitions_op = { }; #endif - -static struct kobject *base_probe(dev_t devt, int *partno, void *data) -{ - if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) - /* Make old-style 2.4 aliases work */ - request_module("block-major-%d", MAJOR(devt)); - return NULL; -} - static int __init genhd_device_init(void) { int error; @@ -1220,7 +1085,6 @@ static int __init genhd_device_init(void) error = class_register(&block_class); if (unlikely(error)) return error; - bdev_map = kobj_map_init(base_probe, &block_class_lock); blk_dev_init(); register_blkdev(BLOCK_EXT_MAJOR, "blkext"); @@ -1278,25 +1142,22 @@ static ssize_t disk_ro_show(struct device *dev, ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%llu\n", - (unsigned long long)part_nr_sects_read(p)); + return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); } ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev->bd_disk->queue; struct disk_stats stat; unsigned int inflight; - part_stat_read_all(p, &stat); + part_stat_read_all(bdev, &stat); if (queue_is_mq(q)) - inflight = blk_mq_in_flight(q, p); + inflight = blk_mq_in_flight(q, bdev); else - inflight = part_in_flight(p); + inflight = part_in_flight(bdev); return sprintf(buf, "%8lu %8lu %8llu %8u " @@ -1331,14 +1192,14 @@ ssize_t part_stat_show(struct device *dev, ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - struct request_queue *q = part_to_disk(p)->queue; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev->bd_disk->queue; unsigned int inflight[2]; if (queue_is_mq(q)) - blk_mq_in_flight_rw(q, p, inflight); + blk_mq_in_flight_rw(q, bdev, inflight); else - part_in_flight_rw(p, inflight); + part_in_flight_rw(bdev, inflight); return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); } @@ -1386,20 +1247,17 @@ static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%d\n", p->make_it_fail); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail); } ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct hd_struct *p = dev_to_part(dev); int i; if (count > 0 && sscanf(buf, "%d", &i) > 0) - p->make_it_fail = (i == 0) ? 0 : 1; + dev_to_bdev(dev)->bd_make_it_fail = i; return count; } @@ -1538,11 +1396,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno) * * This function releases all allocated resources of the gendisk. * - * The struct gendisk refcount is incremented with get_gendisk() or - * get_disk_and_module(), and its refcount is decremented with - * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this - * function is called. - * * Drivers which used __device_add_disk() have a gendisk with a request_queue * assigned. Since the request_queue sits on top of the gendisk for these * drivers we also call blk_put_queue() for them, and we expect the @@ -1561,7 +1414,7 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); disk_replace_part_tbl(disk, NULL); - hd_free_part(&disk->part0); + bdput(disk->part0); if (disk->queue) blk_put_queue(disk->queue); kfree(disk); @@ -1599,7 +1452,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) { struct gendisk *gp = v; struct disk_part_iter piter; - struct hd_struct *hd; + struct block_device *hd; char buf[BDEVNAME_SIZE]; unsigned int inflight; struct disk_stats stat; @@ -1627,8 +1480,8 @@ static int diskstats_show(struct seq_file *seqf, void *v) "%lu %lu %lu %u " "%lu %u" "\n", - MAJOR(part_devt(hd)), MINOR(part_devt(hd)), - disk_name(gp, hd->partno, buf), + MAJOR(hd->bd_dev), MINOR(hd->bd_dev), + disk_name(gp, hd->bd_partno, buf), stat.ios[STAT_READ], stat.merges[STAT_READ], stat.sectors[STAT_READ], @@ -1686,7 +1539,7 @@ dev_t blk_lookup_devt(const char *name, int partno) class_dev_iter_init(&iter, &block_class, NULL, &disk_type); while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); - struct hd_struct *part; + struct block_device *part; if (strcmp(dev_name(dev), name)) continue; @@ -1699,13 +1552,12 @@ dev_t blk_lookup_devt(const char *name, int partno) MINOR(dev->devt) + partno); break; } - part = disk_get_part(disk, partno); + part = bdget_disk(disk, partno); if (part) { - devt = part_devt(part); - disk_put_part(part); + devt = part->bd_dev; + bdput(part); break; } - disk_put_part(part); } class_dev_iter_exit(&iter); return devt; @@ -1727,32 +1579,16 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) if (!disk) return NULL; - disk->part0.dkstats = alloc_percpu(struct disk_stats); - if (!disk->part0.dkstats) + disk->part0 = bdev_alloc(disk, 0); + if (!disk->part0) goto out_free_disk; - init_rwsem(&disk->lookup_sem); disk->node_id = node_id; - if (disk_expand_part_tbl(disk, 0)) { - free_percpu(disk->part0.dkstats); - goto out_free_disk; - } + if (disk_expand_part_tbl(disk, 0)) + goto out_bdput; ptbl = rcu_dereference_protected(disk->part_tbl, 1); - rcu_assign_pointer(ptbl->part[0], &disk->part0); - - /* - * set_capacity() and get_capacity() currently don't use - * seqcounter to read/update the part0->nr_sects. Still init - * the counter as we can read the sectors in IO submission - * patch using seqence counters. - * - * TODO: Ideally set_capacity() and get_capacity() should be - * converted to make use of bd_mutex and sequence counters. - */ - hd_sects_seq_init(&disk->part0); - if (hd_ref_init(&disk->part0)) - goto out_free_part0; + rcu_assign_pointer(ptbl->part[0], disk->part0); disk->minors = minors; rand_initialize_disk(disk); @@ -1761,43 +1597,14 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) device_initialize(disk_to_dev(disk)); return disk; -out_free_part0: - hd_free_part(&disk->part0); +out_bdput: + bdput(disk->part0); out_free_disk: kfree(disk); return NULL; } EXPORT_SYMBOL(__alloc_disk_node); -/** - * get_disk_and_module - increments the gendisk and gendisk fops module refcount - * @disk: the struct gendisk to increment the refcount for - * - * This increments the refcount for the struct gendisk, and the gendisk's - * fops module owner. - * - * Context: Any context. - */ -struct kobject *get_disk_and_module(struct gendisk *disk) -{ - struct module *owner; - struct kobject *kobj; - - if (!disk->fops) - return NULL; - owner = disk->fops->owner; - if (owner && !try_module_get(owner)) - return NULL; - kobj = kobject_get_unless_zero(&disk_to_dev(disk)->kobj); - if (kobj == NULL) { - module_put(owner); - return NULL; - } - return kobj; - -} -EXPORT_SYMBOL(get_disk_and_module); - /** * put_disk - decrements the gendisk refcount * @disk: the struct gendisk to decrement the refcount for @@ -1811,31 +1618,10 @@ EXPORT_SYMBOL(get_disk_and_module); void put_disk(struct gendisk *disk) { if (disk) - kobject_put(&disk_to_dev(disk)->kobj); + put_device(disk_to_dev(disk)); } EXPORT_SYMBOL(put_disk); -/** - * put_disk_and_module - decrements the module and gendisk refcount - * @disk: the struct gendisk to decrement the refcount for - * - * This is a counterpart of get_disk_and_module() and thus also of - * get_gendisk(). - * - * Context: Any context, but the last reference must not be dropped from - * atomic context. - */ -void put_disk_and_module(struct gendisk *disk) -{ - if (disk) { - struct module *owner = disk->fops->owner; - - put_disk(disk); - module_put(owner); - } -} -EXPORT_SYMBOL(put_disk_and_module); - static void set_disk_ro_uevent(struct gendisk *gd, int ro) { char event[] = "DISK_RO=1"; @@ -1846,26 +1632,19 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); } -void set_device_ro(struct block_device *bdev, int flag) -{ - bdev->bd_part->policy = flag; -} - -EXPORT_SYMBOL(set_device_ro); - void set_disk_ro(struct gendisk *disk, int flag) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; - if (disk->part0.policy != flag) { + if (disk->part0->bd_read_only != flag) { set_disk_ro_uevent(disk, flag); - disk->part0.policy = flag; + disk->part0->bd_read_only = flag; } disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) - part->policy = flag; + part->bd_read_only = flag; disk_part_iter_exit(&piter); } @@ -1875,7 +1654,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - return bdev->bd_part->policy; + return bdev->bd_read_only; } EXPORT_SYMBOL(bdev_read_only); diff --git a/block/ioctl.c b/block/ioctl.c index 3fbc382eb926..d61d652078f4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -35,15 +35,6 @@ static int blkpg_do_ioctl(struct block_device *bdev, start = p.start >> SECTOR_SHIFT; length = p.length >> SECTOR_SHIFT; - /* check for fit in a hd_struct */ - if (sizeof(sector_t) < sizeof(long long)) { - long pstart = start, plength = length; - - if (pstart != start || plength != length || pstart < 0 || - plength < 0 || p.pno > 65535) - return -EINVAL; - } - switch (op) { case BLKPG_ADD_PARTITION: /* check if partition is aligned to blocksize */ @@ -219,23 +210,6 @@ static int compat_put_ulong(compat_ulong_t __user *argp, compat_ulong_t val) } #endif -int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, - unsigned cmd, unsigned long arg) -{ - struct gendisk *disk = bdev->bd_disk; - - if (disk->fops->ioctl) - return disk->fops->ioctl(bdev, mode, cmd, arg); - - return -ENOTTY; -} -/* - * For the record: _GPL here is only because somebody decided to slap it - * on the previous export. Sheer idiocy, since it wasn't copyrightable - * at all and could be open-coded without any exports by anybody who cares. - */ -EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); - #ifdef CONFIG_COMPAT /* * This is the equivalent of compat_ptr_ioctl(), to be used by block @@ -346,38 +320,11 @@ static int blkdev_pr_clear(struct block_device *bdev, return ops->pr_clear(bdev, c.key); } -/* - * Is it an unrecognized ioctl? The correct returns are either - * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a - * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl - * code before returning. - * - * Confused drivers sometimes return EINVAL, which is wrong. It - * means "I understood the ioctl command, but the parameters to - * it were wrong". - * - * We should aim to just fix the broken drivers, the EINVAL case - * should go away. - */ -static inline int is_unrecognized_ioctl(int ret) -{ - return ret == -EINVAL || - ret == -ENOTTY || - ret == -ENOIOCTLCMD; -} - static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - if (!is_unrecognized_ioctl(ret)) - return ret; - fsync_bdev(bdev); invalidate_bdev(bdev); return 0; @@ -391,12 +338,14 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, if (!capable(CAP_SYS_ADMIN)) return -EACCES; - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - if (!is_unrecognized_ioctl(ret)) - return ret; if (get_user(n, (int __user *)arg)) return -EFAULT; - set_device_ro(bdev, n); + if (bdev->bd_disk->fops->set_read_only) { + ret = bdev->bd_disk->fops->set_read_only(bdev, n); + if (ret) + return ret; + } + bdev->bd_read_only = n; return 0; } @@ -619,10 +568,12 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, } ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); - if (ret == -ENOIOCTLCMD) - return __blkdev_driver_ioctl(bdev, mode, cmd, arg); + if (ret != -ENOIOCTLCMD) + return ret; - return ret; + if (!bdev->bd_disk->fops->ioctl) + return -ENOTTY; + return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); } EXPORT_SYMBOL_GPL(blkdev_ioctl); /* for /dev/raw */ @@ -639,8 +590,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) { int ret; void __user *argp = compat_ptr(arg); - struct inode *inode = file->f_mapping->host; - struct block_device *bdev = inode->i_bdev; + struct block_device *bdev = I_BDEV(file->f_mapping->host); struct gendisk *disk = bdev->bd_disk; fmode_t mode = file->f_mode; loff_t size; diff --git a/block/partitions/core.c b/block/partitions/core.c index a02e22411594..deca253583bd 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -85,6 +85,13 @@ static int (*check_part[])(struct parsed_partitions *) = { NULL }; +static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) +{ + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); +} + static struct parsed_partitions *allocate_partitions(struct gendisk *hd) { struct parsed_partitions *state; @@ -175,44 +182,39 @@ static struct parsed_partitions *check_partition(struct gendisk *hd, static ssize_t part_partition_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%d\n", p->partno); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_partno); } static ssize_t part_start_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - - return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); + return sprintf(buf, "%llu\n", dev_to_bdev(dev)->bd_start_sect); } static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->policy ? 1 : 0); + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_read_only); } static ssize_t part_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev = dev_to_bdev(dev); return sprintf(buf, "%u\n", - queue_limit_alignment_offset(&part_to_disk(p)->queue->limits, - p->start_sect)); + queue_limit_alignment_offset(&bdev->bd_disk->queue->limits, + bdev->bd_start_sect)); } static ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct hd_struct *p = dev_to_part(dev); + struct block_device *bdev = dev_to_bdev(dev); return sprintf(buf, "%u\n", - queue_limit_discard_alignment(&part_to_disk(p)->queue->limits, - p->start_sect)); + queue_limit_discard_alignment(&bdev->bd_disk->queue->limits, + bdev->bd_start_sect)); } static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); @@ -257,19 +259,17 @@ static const struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { - struct hd_struct *p = dev_to_part(dev); blk_free_devt(dev->devt); - hd_free_part(p); - kfree(p); + bdput(dev_to_bdev(dev)); } static int part_uevent(struct device *dev, struct kobj_uevent_env *env) { - struct hd_struct *part = dev_to_part(dev); + struct block_device *part = dev_to_bdev(dev); - add_uevent_var(env, "PARTN=%u", part->partno); - if (part->info && part->info->volname[0]) - add_uevent_var(env, "PARTNAME=%s", part->info->volname); + add_uevent_var(env, "PARTN=%u", part->bd_partno); + if (part->bd_meta_info && part->bd_meta_info->volname[0]) + add_uevent_var(env, "PARTNAME=%s", part->bd_meta_info->volname); return 0; } @@ -280,73 +280,29 @@ struct device_type part_type = { .uevent = part_uevent, }; -static void hd_struct_free_work(struct work_struct *work) -{ - struct hd_struct *part = - container_of(to_rcu_work(work), struct hd_struct, rcu_work); - struct gendisk *disk = part_to_disk(part); - - /* - * Release the disk reference acquired in delete_partition here. - * We can't release it in hd_struct_free because the final put_device - * needs process context and thus can't be run directly from a - * percpu_ref ->release handler. - */ - put_device(disk_to_dev(disk)); - - part->start_sect = 0; - part->nr_sects = 0; - part_stat_set_all(part, 0); - put_device(part_to_dev(part)); -} - -static void hd_struct_free(struct percpu_ref *ref) -{ - struct hd_struct *part = container_of(ref, struct hd_struct, ref); - struct gendisk *disk = part_to_disk(part); - struct disk_part_tbl *ptbl = - rcu_dereference_protected(disk->part_tbl, 1); - - rcu_assign_pointer(ptbl->last_lookup, NULL); - - INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work); - queue_rcu_work(system_wq, &part->rcu_work); -} - -int hd_ref_init(struct hd_struct *part) -{ - if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL)) - return -ENOMEM; - return 0; -} - /* * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -void delete_partition(struct hd_struct *part) +void delete_partition(struct block_device *part) { - struct gendisk *disk = part_to_disk(part); + struct gendisk *disk = part->bd_disk; struct disk_part_tbl *ptbl = rcu_dereference_protected(disk->part_tbl, 1); - /* - * ->part_tbl is referenced in this part's release handler, so - * we have to hold the disk device - */ - get_device(disk_to_dev(disk)); - rcu_assign_pointer(ptbl->part[part->partno], NULL); - kobject_put(part->holder_dir); - device_del(part_to_dev(part)); + rcu_assign_pointer(ptbl->part[part->bd_partno], NULL); + rcu_assign_pointer(ptbl->last_lookup, NULL); + + kobject_put(part->bd_holder_dir); + device_del(&part->bd_device); /* - * Remove gendisk pointer from idr so that it cannot be looked up - * while RCU period before freeing gendisk is running to prevent - * use-after-free issues. Note that the device number stays - * "in-use" until we really free the gendisk. + * Remove the block device from the inode hash, so that it cannot be + * looked up any more even when openers still hold references. */ - blk_invalidate_devt(part_devt(part)); - percpu_ref_kill(&part->ref); + remove_inode_hash(part->bd_inode); + + put_device(&part->bd_device); } static ssize_t whole_disk_show(struct device *dev, @@ -360,14 +316,14 @@ static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); * Must be called either with bd_mutex held, before a disk can be opened or * after all disk users are gone. */ -static struct hd_struct *add_partition(struct gendisk *disk, int partno, +static struct block_device *add_partition(struct gendisk *disk, int partno, sector_t start, sector_t len, int flags, struct partition_meta_info *info) { - struct hd_struct *p; dev_t devt = MKDEV(0, 0); struct device *ddev = disk_to_dev(disk); struct device *pdev; + struct block_device *bdev; struct disk_part_tbl *ptbl; const char *dname; int err; @@ -398,36 +354,22 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, if (ptbl->part[partno]) return ERR_PTR(-EBUSY); - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return ERR_PTR(-EBUSY); + bdev = bdev_alloc(disk, partno); + if (!bdev) + return ERR_PTR(-ENOMEM); - p->dkstats = alloc_percpu(struct disk_stats); - if (!p->dkstats) { - err = -ENOMEM; - goto out_free; - } - - hd_sects_seq_init(p); - pdev = part_to_dev(p); - - p->start_sect = start; - p->nr_sects = len; - p->partno = partno; - p->policy = get_disk_ro(disk); + bdev->bd_start_sect = start; + bdev_set_nr_sectors(bdev, len); + bdev->bd_read_only = get_disk_ro(disk); if (info) { - struct partition_meta_info *pinfo; - - pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id); - if (!pinfo) { - err = -ENOMEM; - goto out_free_stats; - } - memcpy(pinfo, info, sizeof(*info)); - p->info = pinfo; + err = -ENOMEM; + bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL); + if (!bdev->bd_meta_info) + goto out_bdput; } + pdev = &bdev->bd_device; dname = dev_name(ddev); if (isdigit(dname[strlen(dname) - 1])) dev_set_name(pdev, "%sp%d", dname, partno); @@ -439,9 +381,9 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, pdev->type = &part_type; pdev->parent = ddev; - err = blk_alloc_devt(p, &devt); + err = blk_alloc_devt(bdev, &devt); if (err) - goto out_free_info; + goto out_bdput; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ @@ -451,8 +393,8 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_put; err = -ENOMEM; - p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); - if (!p->holder_dir) + bdev->bd_holder_dir = kobject_create_and_add("holders", &pdev->kobj); + if (!bdev->bd_holder_dir) goto out_del; dev_set_uevent_suppress(pdev, 0); @@ -462,32 +404,20 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_del; } - err = hd_ref_init(p); - if (err) { - if (flags & ADDPART_FLAG_WHOLEDISK) - goto out_remove_file; - goto out_del; - } - /* everything is up and running, commence */ - rcu_assign_pointer(ptbl->part[partno], p); + bdev_add(bdev, devt); + rcu_assign_pointer(ptbl->part[partno], bdev); /* suppress uevent if the disk suppresses it */ if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); - return p; + return bdev; -out_free_info: - kfree(p->info); -out_free_stats: - free_percpu(p->dkstats); -out_free: - kfree(p); +out_bdput: + bdput(bdev); return ERR_PTR(err); -out_remove_file: - device_remove_file(pdev, &dev_attr_whole_disk); out_del: - kobject_put(p->holder_dir); + kobject_put(bdev->bd_holder_dir); device_del(pdev); out_put: put_device(pdev); @@ -498,14 +428,14 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, sector_t length, int skip_partno) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; bool overlap = false; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) { - if (part->partno == skip_partno || - start >= part->start_sect + part->nr_sects || - start + length <= part->start_sect) + if (part->bd_partno == skip_partno || + start >= part->bd_start_sect + bdev_nr_sectors(part) || + start + length <= part->bd_start_sect) continue; overlap = true; break; @@ -518,7 +448,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, int bdev_add_partition(struct block_device *bdev, int partno, sector_t start, sector_t length) { - struct hd_struct *part; + struct block_device *part; mutex_lock(&bdev->bd_mutex); if (partition_overlaps(bdev->bd_disk, start, length, -1)) { @@ -534,77 +464,59 @@ int bdev_add_partition(struct block_device *bdev, int partno, int bdev_del_partition(struct block_device *bdev, int partno) { - struct block_device *bdevp; - struct hd_struct *part = NULL; + struct block_device *part; int ret; - bdevp = bdget_disk(bdev->bd_disk, partno); - if (!bdevp) + part = bdget_disk(bdev->bd_disk, partno); + if (!part) return -ENXIO; - mutex_lock(&bdevp->bd_mutex); + mutex_lock(&part->bd_mutex); mutex_lock_nested(&bdev->bd_mutex, 1); - ret = -ENXIO; - part = disk_get_part(bdev->bd_disk, partno); - if (!part) - goto out_unlock; - ret = -EBUSY; - if (bdevp->bd_openers) + if (part->bd_openers) goto out_unlock; - sync_blockdev(bdevp); - invalidate_bdev(bdevp); + sync_blockdev(part); + invalidate_bdev(part); delete_partition(part); ret = 0; out_unlock: mutex_unlock(&bdev->bd_mutex); - mutex_unlock(&bdevp->bd_mutex); - bdput(bdevp); - if (part) - disk_put_part(part); + mutex_unlock(&part->bd_mutex); + bdput(part); return ret; } int bdev_resize_partition(struct block_device *bdev, int partno, sector_t start, sector_t length) { - struct block_device *bdevp; - struct hd_struct *part; + struct block_device *part; int ret = 0; - part = disk_get_part(bdev->bd_disk, partno); + part = bdget_disk(bdev->bd_disk, partno); if (!part) return -ENXIO; - ret = -ENOMEM; - bdevp = bdget_part(part); - if (!bdevp) - goto out_put_part; - - mutex_lock(&bdevp->bd_mutex); + mutex_lock(&part->bd_mutex); mutex_lock_nested(&bdev->bd_mutex, 1); - ret = -EINVAL; - if (start != part->start_sect) + if (start != part->bd_start_sect) goto out_unlock; ret = -EBUSY; if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - part_nr_sects_write(part, length); - bd_set_nr_sectors(bdevp, length); + bdev_set_nr_sectors(part, length); ret = 0; out_unlock: - mutex_unlock(&bdevp->bd_mutex); + mutex_unlock(&part->bd_mutex); mutex_unlock(&bdev->bd_mutex); - bdput(bdevp); -out_put_part: - disk_put_part(part); + bdput(part); return ret; } @@ -627,7 +539,7 @@ static bool disk_unlock_native_capacity(struct gendisk *disk) int blk_drop_partitions(struct block_device *bdev) { struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; if (bdev->bd_part_count) return -EBUSY; @@ -652,7 +564,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, { sector_t size = state->parts[p].size; sector_t from = state->parts[p].from; - struct hd_struct *part; + struct block_device *part; if (!size) return true; @@ -692,7 +604,7 @@ static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, if (IS_BUILTIN(CONFIG_BLK_DEV_MD) && (state->parts[p].flags & ADDPART_FLAG_RAID)) - md_autodetect_dev(part_to_dev(part)->devt); + md_autodetect_dev(part->bd_dev); return true; } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ecceaaa1a66f..262326973ee0 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -16,13 +16,7 @@ menuconfig BLK_DEV if BLK_DEV -config BLK_DEV_NULL_BLK - tristate "Null test block driver" - select CONFIGFS_FS - -config BLK_DEV_NULL_BLK_FAULT_INJECTION - bool "Support fault injection for Null test block driver" - depends on BLK_DEV_NULL_BLK && FAULT_INJECTION +source "drivers/block/null_blk/Kconfig" config BLK_DEV_FD tristate "Normal floppy disk support" diff --git a/drivers/block/Makefile b/drivers/block/Makefile index e1f63117ee94..a3170859e01d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -41,12 +41,7 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ -obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o -null_blk-objs := null_blk_main.o -ifeq ($(CONFIG_BLK_DEV_ZONED), y) -null_blk-$(CONFIG_TRACING) += null_blk_trace.o -endif -null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ skd-y := skd_main.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 71c2b1564558..9e2d0c6a3877 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -201,7 +201,7 @@ struct amiga_floppy_struct { int busy; /* true when drive is active */ int dirty; /* true when trackbuf is not on disk */ int status; /* current error code for unit */ - struct gendisk *gendisk; + struct gendisk *gendisk[2]; struct blk_mq_tag_set tag_set; }; @@ -1669,6 +1669,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) return -EBUSY; } + if (unit[drive].type->code == FD_NODRIVE) { + mutex_unlock(&amiflop_mutex); + return -ENXIO; + } + if (mode & (FMODE_READ|FMODE_WRITE)) { bdev_check_media_change(bdev); if (mode & FMODE_WRITE) { @@ -1695,7 +1700,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) unit[drive].dtype=&data_types[system]; unit[drive].blocks=unit[drive].type->heads*unit[drive].type->tracks* data_types[system].sects*unit[drive].type->sect_mult; - set_capacity(unit[drive].gendisk, unit[drive].blocks); + set_capacity(unit[drive].gendisk[system], unit[drive].blocks); printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive, unit[drive].type->name, data_types[system].name); @@ -1772,36 +1777,68 @@ static const struct blk_mq_ops amiflop_mq_ops = { .queue_rq = amiflop_queue_rq, }; -static struct gendisk *fd_alloc_disk(int drive) +static int fd_alloc_disk(int drive, int system) { struct gendisk *disk; disk = alloc_disk(1); if (!disk) goto out; - - disk->queue = blk_mq_init_sq_queue(&unit[drive].tag_set, &amiflop_mq_ops, - 2, BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disk->queue)) { - disk->queue = NULL; + disk->queue = blk_mq_init_queue(&unit[drive].tag_set); + if (IS_ERR(disk->queue)) goto out_put_disk; - } - unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); - if (!unit[drive].trackbuf) - goto out_cleanup_queue; + disk->major = FLOPPY_MAJOR; + disk->first_minor = drive + system; + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + if (system) + sprintf(disk->disk_name, "fd%d_msdos", drive); + else + sprintf(disk->disk_name, "fd%d", drive); + disk->private_data = &unit[drive]; + set_capacity(disk, 880 * 2); - return disk; + unit[drive].gendisk[system] = disk; + add_disk(disk); + return 0; -out_cleanup_queue: - blk_cleanup_queue(disk->queue); - disk->queue = NULL; - blk_mq_free_tag_set(&unit[drive].tag_set); out_put_disk: + disk->queue = NULL; put_disk(disk); +out: + return -ENOMEM; +} + +static int fd_alloc_drive(int drive) +{ + unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); + if (!unit[drive].trackbuf) + goto out; + + memset(&unit[drive].tag_set, 0, sizeof(unit[drive].tag_set)); + unit[drive].tag_set.ops = &amiflop_mq_ops; + unit[drive].tag_set.nr_hw_queues = 1; + unit[drive].tag_set.nr_maps = 1; + unit[drive].tag_set.queue_depth = 2; + unit[drive].tag_set.numa_node = NUMA_NO_NODE; + unit[drive].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + if (blk_mq_alloc_tag_set(&unit[drive].tag_set)) + goto out_cleanup_trackbuf; + + pr_cont(" fd%d", drive); + + if (fd_alloc_disk(drive, 0) || fd_alloc_disk(drive, 1)) + goto out_cleanup_tagset; + return 0; + +out_cleanup_tagset: + blk_mq_free_tag_set(&unit[drive].tag_set); +out_cleanup_trackbuf: + kfree(unit[drive].trackbuf); out: unit[drive].type->code = FD_NODRIVE; - return NULL; + return -ENOMEM; } static int __init fd_probe_drives(void) @@ -1812,29 +1849,16 @@ static int __init fd_probe_drives(void) drives=0; nomem=0; for(drive=0;drivecode == FD_NODRIVE) continue; - disk = fd_alloc_disk(drive); - if (!disk) { + if (fd_alloc_drive(drive) < 0) { pr_cont(" no mem for fd%d", drive); nomem = 1; continue; } - unit[drive].gendisk = disk; drives++; - - pr_cont(" fd%d",drive); - disk->major = FLOPPY_MAJOR; - disk->first_minor = drive; - disk->fops = &floppy_fops; - disk->events = DISK_EVENT_MEDIA_CHANGE; - sprintf(disk->disk_name, "fd%d", drive); - disk->private_data = &unit[drive]; - set_capacity(disk, 880*2); - add_disk(disk); } if ((drives > 0) || (nomem == 0)) { if (drives == 0) @@ -1846,15 +1870,6 @@ static int __init fd_probe_drives(void) return -ENOMEM; } -static struct kobject *floppy_find(dev_t dev, int *part, void *data) -{ - int drive = *part & 3; - if (unit[drive].type->code == FD_NODRIVE) - return NULL; - *part = 0; - return get_disk_and_module(unit[drive].gendisk); -} - static int __init amiga_floppy_probe(struct platform_device *pdev) { int i, ret; @@ -1884,9 +1899,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev) if (fd_probe_drives() < 1) /* No usable drives */ goto out_probe; - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - /* initialize variables */ timer_setup(&motor_on_timer, motor_on_callback, 0); motor_on_timer.expires = 0; diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 313f0b946fe2..ac720bdcd983 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -890,19 +890,13 @@ void aoecmd_sleepwork(struct work_struct *work) { struct aoedev *d = container_of(work, struct aoedev, work); - struct block_device *bd; - u64 ssize; if (d->flags & DEVFL_GDALLOC) aoeblk_gdalloc(d); if (d->flags & DEVFL_NEWSIZE) { - ssize = get_capacity(d->gd); - bd = bdget_disk(d->gd, 0); - if (bd) { - bd_set_nr_sectors(bd, ssize); - bdput(bd); - } + set_capacity_and_notify(d->gd, d->ssize); + spin_lock_irq(&d->lock); d->flags |= DEVFL_UP; d->flags &= ~DEVFL_NEWSIZE; @@ -971,10 +965,9 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) d->geo.start = 0; if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) return; - if (d->gd != NULL) { - set_capacity(d->gd, ssize); + if (d->gd != NULL) d->flags |= DEVFL_NEWSIZE; - } else + else d->flags |= DEVFL_GDALLOC; schedule_work(&d->work); } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 3e881fdb06e0..104b713f4055 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -297,7 +297,7 @@ static struct atari_floppy_struct { unsigned int wpstat; /* current state of WP signal (for disk change detection) */ int flags; /* flags */ - struct gendisk *disk; + struct gendisk *disk[NUM_DISK_MINORS]; int ref; int type; struct blk_mq_tag_set tag_set; @@ -723,12 +723,16 @@ static void fd_error( void ) static int do_format(int drive, int type, struct atari_format_descr *desc) { - struct request_queue *q = unit[drive].disk->queue; + struct request_queue *q; unsigned char *p; int sect, nsect; unsigned long flags; int ret; + if (type) + type--; + + q = unit[drive].disk[type]->queue; blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); @@ -738,7 +742,7 @@ static int do_format(int drive, int type, struct atari_format_descr *desc) local_irq_restore(flags); if (type) { - if (--type >= NUM_DISK_MINORS || + if (type >= NUM_DISK_MINORS || minor2disktype[type].drive_types > DriveType) { ret = -EINVAL; goto out; @@ -1154,7 +1158,7 @@ static void fd_rwsec_done1(int status) if (SUDT[-1].blocks > ReqBlock) { /* try another disk type */ SUDT--; - set_capacity(unit[SelectedDrive].disk, + set_capacity(unit[SelectedDrive].disk[0], SUDT->blocks); } else Probing = 0; @@ -1169,7 +1173,7 @@ static void fd_rwsec_done1(int status) /* record not found, but not probing. Maybe stretch wrong ? Restart probing */ if (SUD.autoprobe) { SUDT = atari_disk_type + StartDiskType[DriveType]; - set_capacity(unit[SelectedDrive].disk, + set_capacity(unit[SelectedDrive].disk[0], SUDT->blocks); Probing = 1; } @@ -1515,7 +1519,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, if (!UDT) { Probing = 1; UDT = atari_disk_type + StartDiskType[DriveType]; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(bd->rq->rq_disk, UDT->blocks); UD.autoprobe = 1; } } @@ -1533,7 +1537,7 @@ static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx, } type = minor2disktype[type].index; UDT = &atari_disk_type[type]; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(bd->rq->rq_disk, UDT->blocks); UD.autoprobe = 0; } @@ -1658,7 +1662,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, printk (KERN_INFO "floppy%d: setting %s %p!\n", drive, dtp->name, dtp); UDT = dtp; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(disk, UDT->blocks); if (cmd == FDDEFPRM) { /* save settings as permanent default type */ @@ -1702,7 +1706,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, return -EINVAL; UDT = dtp; - set_capacity(floppy->disk, UDT->blocks); + set_capacity(disk, UDT->blocks); return 0; case FDMSGON: @@ -1725,7 +1729,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, UDT = NULL; /* MSch: invalidate default_params */ default_params[drive].blocks = 0; - set_capacity(floppy->disk, MAX_DISK_SIZE * 2); + set_capacity(disk, MAX_DISK_SIZE * 2); fallthrough; case FDFMTEND: case FDFLUSH: @@ -1962,14 +1966,50 @@ static const struct blk_mq_ops ataflop_mq_ops = { .commit_rqs = ataflop_commit_rqs, }; -static struct kobject *floppy_find(dev_t dev, int *part, void *data) +static int ataflop_alloc_disk(unsigned int drive, unsigned int type) { - int drive = *part & 3; - int type = *part >> 2; + struct gendisk *disk; + int ret; + + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; + + disk->queue = blk_mq_init_queue(&unit[drive].tag_set); + if (IS_ERR(disk->queue)) { + ret = PTR_ERR(disk->queue); + disk->queue = NULL; + put_disk(disk); + return ret; + } + + disk->major = FLOPPY_MAJOR; + disk->first_minor = drive + (type << 2); + sprintf(disk->disk_name, "fd%d", drive); + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + disk->private_data = &unit[drive]; + set_capacity(disk, MAX_DISK_SIZE * 2); + + unit[drive].disk[type] = disk; + return 0; +} + +static DEFINE_MUTEX(ataflop_probe_lock); + +static void ataflop_probe(dev_t dev) +{ + int drive = MINOR(dev) & 3; + int type = MINOR(dev) >> 2; + if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS) - return NULL; - *part = 0; - return get_disk_and_module(unit[drive].disk); + return; + mutex_lock(&ataflop_probe_lock); + if (!unit[drive].disk[type]) { + if (ataflop_alloc_disk(drive, type) == 0) + add_disk(unit[drive].disk[type]); + } + mutex_unlock(&ataflop_probe_lock); } static int __init atari_floppy_init (void) @@ -1981,23 +2021,26 @@ static int __init atari_floppy_init (void) /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */ return -ENODEV; - if (register_blkdev(FLOPPY_MAJOR,"fd")) - return -EBUSY; + mutex_lock(&ataflop_probe_lock); + ret = __register_blkdev(FLOPPY_MAJOR, "fd", ataflop_probe); + if (ret) + goto out_unlock; for (i = 0; i < FD_MAX_UNITS; i++) { - unit[i].disk = alloc_disk(1); - if (!unit[i].disk) { - ret = -ENOMEM; + memset(&unit[i].tag_set, 0, sizeof(unit[i].tag_set)); + unit[i].tag_set.ops = &ataflop_mq_ops; + unit[i].tag_set.nr_hw_queues = 1; + unit[i].tag_set.nr_maps = 1; + unit[i].tag_set.queue_depth = 2; + unit[i].tag_set.numa_node = NUMA_NO_NODE; + unit[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ret = blk_mq_alloc_tag_set(&unit[i].tag_set); + if (ret) goto err; - } - unit[i].disk->queue = blk_mq_init_sq_queue(&unit[i].tag_set, - &ataflop_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(unit[i].disk->queue)) { - put_disk(unit[i].disk); - ret = PTR_ERR(unit[i].disk->queue); - unit[i].disk->queue = NULL; + ret = ataflop_alloc_disk(i, 0); + if (ret) { + blk_mq_free_tag_set(&unit[i].tag_set); goto err; } } @@ -2027,19 +2070,9 @@ static int __init atari_floppy_init (void) for (i = 0; i < FD_MAX_UNITS; i++) { unit[i].track = -1; unit[i].flags = 0; - unit[i].disk->major = FLOPPY_MAJOR; - unit[i].disk->first_minor = i; - sprintf(unit[i].disk->disk_name, "fd%d", i); - unit[i].disk->fops = &floppy_fops; - unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE; - unit[i].disk->private_data = &unit[i]; - set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); - add_disk(unit[i].disk); + add_disk(unit[i].disk[0]); } - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - printk(KERN_INFO "Atari floppy driver: max. %cD, %strack buffering\n", DriveType == 0 ? 'D' : DriveType == 1 ? 'H' : 'E', UseTrackbuffer ? "" : "no "); @@ -2049,14 +2082,14 @@ static int __init atari_floppy_init (void) err: while (--i >= 0) { - struct gendisk *disk = unit[i].disk; - - blk_cleanup_queue(disk->queue); + blk_cleanup_queue(unit[i].disk[0]->queue); + put_disk(unit[i].disk[0]); blk_mq_free_tag_set(&unit[i].tag_set); - put_disk(unit[i].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); +out_unlock: + mutex_unlock(&ataflop_probe_lock); return ret; } @@ -2101,13 +2134,17 @@ __setup("floppy=", atari_floppy_setup); static void __exit atari_floppy_exit(void) { - int i; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); + int i, type; + for (i = 0; i < FD_MAX_UNITS; i++) { - del_gendisk(unit[i].disk); - blk_cleanup_queue(unit[i].disk->queue); + for (type = 0; type < NUM_DISK_MINORS; type++) { + if (!unit[i].disk[type]) + continue; + del_gendisk(unit[i].disk[type]); + blk_cleanup_queue(unit[i].disk[type]->queue); + put_disk(unit[i].disk[type]); + } blk_mq_free_tag_set(&unit[i].tag_set); - put_disk(unit[i].disk); } unregister_blkdev(FLOPPY_MAJOR, "fd"); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index cc49a921339f..c43a6ab4b1f3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -426,14 +426,15 @@ static void brd_free(struct brd_device *brd) kfree(brd); } -static struct brd_device *brd_init_one(int i, bool *new) +static void brd_probe(dev_t dev) { struct brd_device *brd; + int i = MINOR(dev) / max_part; - *new = false; + mutex_lock(&brd_devices_mutex); list_for_each_entry(brd, &brd_devices, brd_list) { if (brd->brd_number == i) - goto out; + goto out_unlock; } brd = brd_alloc(i); @@ -442,9 +443,9 @@ static struct brd_device *brd_init_one(int i, bool *new) add_disk(brd->brd_disk); list_add_tail(&brd->brd_list, &brd_devices); } - *new = true; -out: - return brd; + +out_unlock: + mutex_unlock(&brd_devices_mutex); } static void brd_del_one(struct brd_device *brd) @@ -454,23 +455,6 @@ static void brd_del_one(struct brd_device *brd) brd_free(brd); } -static struct kobject *brd_probe(dev_t dev, int *part, void *data) -{ - struct brd_device *brd; - struct kobject *kobj; - bool new; - - mutex_lock(&brd_devices_mutex); - brd = brd_init_one(MINOR(dev) / max_part, &new); - kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL; - mutex_unlock(&brd_devices_mutex); - - if (new) - *part = 0; - - return kobj; -} - static inline void brd_check_and_reset_par(void) { if (unlikely(!max_part)) @@ -510,11 +494,12 @@ static int __init brd_init(void) * dynamically. */ - if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) + if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) return -EIO; brd_check_and_reset_par(); + mutex_lock(&brd_devices_mutex); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); if (!brd) @@ -532,9 +517,7 @@ static int __init brd_init(void) brd->brd_disk->queue = brd->brd_queue; add_disk(brd->brd_disk); } - - blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, - THIS_MODULE, brd_probe, NULL, NULL); + mutex_unlock(&brd_devices_mutex); pr_info("brd: module loaded\n"); return 0; @@ -544,6 +527,7 @@ static int __init brd_init(void) list_del(&brd->brd_list); brd_free(brd); } + mutex_unlock(&brd_devices_mutex); unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); pr_info("brd: module NOT loaded !!!\n"); @@ -557,7 +541,6 @@ static void __exit brd_exit(void) list_for_each_entry_safe(brd, next, &brd_devices, brd_list) brd_del_one(brd); - blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS); unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); pr_info("brd: module unloaded\n"); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 65b95aef8dbc..1c8c18b2a25f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2036,8 +2036,7 @@ void drbd_set_my_capacity(struct drbd_device *device, sector_t size) { char ppb[10]; - set_capacity(device->vdisk, size); - revalidate_disk_size(device->vdisk, false); + set_capacity_and_notify(device->vdisk, size); drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1), (unsigned long long)size>>1); @@ -2068,8 +2067,7 @@ void drbd_device_cleanup(struct drbd_device *device) } D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); - set_capacity(device->vdisk, 0); - revalidate_disk_size(device->vdisk, false); + set_capacity_and_notify(device->vdisk, 0); if (device->bitmap) { /* maybe never allocated. */ drbd_bm_resize(device, 0, 1); diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dc333dbe5232..09c86ef3f0fd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2802,7 +2802,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) if (c_min_rate == 0) return false; - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - atomic_read(&device->rs_sect_ev); if (atomic_read(&device->ap_actlog_cnt) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index ba56f3f05312..02044ab7f767 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1678,7 +1678,8 @@ void drbd_rs_controller_reset(struct drbd_device *device) atomic_set(&device->rs_sect_in, 0); atomic_set(&device->rs_sect_ev, 0); device->rs_in_flight = 0; - device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors); + device->rs_last_events = + (int)part_stat_read_accum(disk->part0, sectors); /* Updating the RCU protected object in place is necessary since this function gets called from atomic context. diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 7df79ae6b0a1..dfe1dfc901cc 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -402,7 +402,6 @@ static struct floppy_drive_params drive_params[N_DRIVE]; static struct floppy_drive_struct drive_state[N_DRIVE]; static struct floppy_write_errors write_errors[N_DRIVE]; static struct timer_list motor_off_timer[N_DRIVE]; -static struct gendisk *disks[N_DRIVE]; static struct blk_mq_tag_set tag_sets[N_DRIVE]; static struct block_device *opened_bdev[N_DRIVE]; static DEFINE_MUTEX(open_lock); @@ -477,6 +476,8 @@ static struct floppy_struct floppy_type[32] = { { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5" */ }; +static struct gendisk *disks[N_DRIVE][ARRAY_SIZE(floppy_type)]; + #define SECTSIZE (_FD_SECTSIZE(*floppy)) /* Auto-detection: Disk type used until the next media change occurs. */ @@ -4111,7 +4112,7 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) new_dev = MINOR(bdev->bd_dev); drive_state[drive].fd_device = new_dev; - set_capacity(disks[drive], floppy_sizes[new_dev]); + set_capacity(disks[drive][ITYPE(new_dev)], floppy_sizes[new_dev]); if (old_dev != -1 && old_dev != new_dev) { if (buffer_drive == drive) buffer_track = -1; @@ -4579,15 +4580,58 @@ static bool floppy_available(int drive) return true; } -static struct kobject *floppy_find(dev_t dev, int *part, void *data) +static int floppy_alloc_disk(unsigned int drive, unsigned int type) { - int drive = (*part & 3) | ((*part & 0x80) >> 5); - if (drive >= N_DRIVE || !floppy_available(drive)) - return NULL; - if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type)) - return NULL; - *part = 0; - return get_disk_and_module(disks[drive]); + struct gendisk *disk; + int err; + + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; + + disk->queue = blk_mq_init_queue(&tag_sets[drive]); + if (IS_ERR(disk->queue)) { + err = PTR_ERR(disk->queue); + disk->queue = NULL; + put_disk(disk); + return err; + } + + blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); + blk_queue_max_hw_sectors(disk->queue, 64); + disk->major = FLOPPY_MAJOR; + disk->first_minor = TOMINOR(drive) | (type << 2); + disk->fops = &floppy_fops; + disk->events = DISK_EVENT_MEDIA_CHANGE; + if (type) + sprintf(disk->disk_name, "fd%d_type%d", drive, type); + else + sprintf(disk->disk_name, "fd%d", drive); + /* to be cleaned up... */ + disk->private_data = (void *)(long)drive; + disk->flags |= GENHD_FL_REMOVABLE; + + disks[drive][type] = disk; + return 0; +} + +static DEFINE_MUTEX(floppy_probe_lock); + +static void floppy_probe(dev_t dev) +{ + unsigned int drive = (MINOR(dev) & 3) | ((MINOR(dev) & 0x80) >> 5); + unsigned int type = (MINOR(dev) >> 2) & 0x1f; + + if (drive >= N_DRIVE || !floppy_available(drive) || + type >= ARRAY_SIZE(floppy_type)) + return; + + mutex_lock(&floppy_probe_lock); + if (!disks[drive][type]) { + if (floppy_alloc_disk(drive, type) == 0) + add_disk(disks[drive][type]); + } + mutex_unlock(&floppy_probe_lock); } static int __init do_floppy_init(void) @@ -4609,33 +4653,25 @@ static int __init do_floppy_init(void) return -ENOMEM; for (drive = 0; drive < N_DRIVE; drive++) { - disks[drive] = alloc_disk(1); - if (!disks[drive]) { - err = -ENOMEM; + memset(&tag_sets[drive], 0, sizeof(tag_sets[drive])); + tag_sets[drive].ops = &floppy_mq_ops; + tag_sets[drive].nr_hw_queues = 1; + tag_sets[drive].nr_maps = 1; + tag_sets[drive].queue_depth = 2; + tag_sets[drive].numa_node = NUMA_NO_NODE; + tag_sets[drive].flags = BLK_MQ_F_SHOULD_MERGE; + err = blk_mq_alloc_tag_set(&tag_sets[drive]); + if (err) goto out_put_disk; - } - disks[drive]->queue = blk_mq_init_sq_queue(&tag_sets[drive], - &floppy_mq_ops, 2, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(disks[drive]->queue)) { - err = PTR_ERR(disks[drive]->queue); - disks[drive]->queue = NULL; + err = floppy_alloc_disk(drive, 0); + if (err) goto out_put_disk; - } - - blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH); - blk_queue_max_hw_sectors(disks[drive]->queue, 64); - disks[drive]->major = FLOPPY_MAJOR; - disks[drive]->first_minor = TOMINOR(drive); - disks[drive]->fops = &floppy_fops; - disks[drive]->events = DISK_EVENT_MEDIA_CHANGE; - sprintf(disks[drive]->disk_name, "fd%d", drive); timer_setup(&motor_off_timer[drive], motor_off_callback, 0); } - err = register_blkdev(FLOPPY_MAJOR, "fd"); + err = __register_blkdev(FLOPPY_MAJOR, "fd", floppy_probe); if (err) goto out_put_disk; @@ -4643,9 +4679,6 @@ static int __init do_floppy_init(void) if (err) goto out_unreg_blkdev; - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, NULL); - for (i = 0; i < 256; i++) if (ITYPE(i)) floppy_sizes[i] = floppy_type[ITYPE(i)].size; @@ -4673,7 +4706,7 @@ static int __init do_floppy_init(void) if (fdc_state[0].address == -1) { cancel_delayed_work(&fd_timeout); err = -ENODEV; - goto out_unreg_region; + goto out_unreg_driver; } #if N_FDC > 1 fdc_state[1].address = FDC2; @@ -4684,7 +4717,7 @@ static int __init do_floppy_init(void) if (err) { cancel_delayed_work(&fd_timeout); err = -EBUSY; - goto out_unreg_region; + goto out_unreg_driver; } /* initialise drive state */ @@ -4761,10 +4794,8 @@ static int __init do_floppy_init(void) if (err) goto out_remove_drives; - /* to be cleaned up... */ - disks[drive]->private_data = (void *)(long)drive; - disks[drive]->flags |= GENHD_FL_REMOVABLE; - device_add_disk(&floppy_device[drive].dev, disks[drive], NULL); + device_add_disk(&floppy_device[drive].dev, disks[drive][0], + NULL); } return 0; @@ -4772,30 +4803,27 @@ static int __init do_floppy_init(void) out_remove_drives: while (drive--) { if (floppy_available(drive)) { - del_gendisk(disks[drive]); + del_gendisk(disks[drive][0]); platform_device_unregister(&floppy_device[drive]); } } out_release_dma: if (atomic_read(&usage_count)) floppy_release_irq_and_dma(); -out_unreg_region: - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); +out_unreg_driver: platform_driver_unregister(&floppy_driver); out_unreg_blkdev: unregister_blkdev(FLOPPY_MAJOR, "fd"); out_put_disk: destroy_workqueue(floppy_wq); for (drive = 0; drive < N_DRIVE; drive++) { - if (!disks[drive]) + if (!disks[drive][0]) break; - if (disks[drive]->queue) { - del_timer_sync(&motor_off_timer[drive]); - blk_cleanup_queue(disks[drive]->queue); - disks[drive]->queue = NULL; - blk_mq_free_tag_set(&tag_sets[drive]); - } - put_disk(disks[drive]); + del_timer_sync(&motor_off_timer[drive]); + blk_cleanup_queue(disks[drive][0]->queue); + disks[drive][0]->queue = NULL; + blk_mq_free_tag_set(&tag_sets[drive]); + put_disk(disks[drive][0]); } return err; } @@ -5006,9 +5034,8 @@ module_init(floppy_module_init); static void __exit floppy_module_exit(void) { - int drive; + int drive, i; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); unregister_blkdev(FLOPPY_MAJOR, "fd"); platform_driver_unregister(&floppy_driver); @@ -5018,10 +5045,16 @@ static void __exit floppy_module_exit(void) del_timer_sync(&motor_off_timer[drive]); if (floppy_available(drive)) { - del_gendisk(disks[drive]); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + del_gendisk(disks[drive][i]); + } platform_device_unregister(&floppy_device[drive]); } - blk_cleanup_queue(disks[drive]->queue); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + blk_cleanup_queue(disks[drive][i]->queue); + } blk_mq_free_tag_set(&tag_sets[drive]); /* @@ -5029,10 +5062,17 @@ static void __exit floppy_module_exit(void) * queue reference in put_disk(). */ if (!(allowed_drive_mask & (1 << drive)) || - fdc_state[FDC(drive)].version == FDC_NONE) - disks[drive]->queue = NULL; + fdc_state[FDC(drive)].version == FDC_NONE) { + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + disks[drive][i]->queue = NULL; + } + } - put_disk(disks[drive]); + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { + if (disks[drive][i]) + put_disk(disks[drive][i]); + } } cancel_delayed_work_sync(&fd_timeout); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a58084c2ed7c..e5ff328f0917 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -251,12 +251,8 @@ loop_validate_block_size(unsigned short bsize) */ static void loop_set_size(struct loop_device *lo, loff_t size) { - struct block_device *bdev = lo->lo_device; - - bd_set_nr_sectors(bdev, size); - - if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false)) - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); + if (!set_capacity_and_notify(lo->lo_disk, size)) + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); } static inline int @@ -679,10 +675,10 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; - if (f->f_mapping->host->i_bdev == bdev) + if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; - l = f->f_mapping->host->i_bdev->bd_disk->private_data; + l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; if (l->lo_state != Lo_bound) { return -EINVAL; } @@ -889,9 +885,7 @@ static void loop_config_discard(struct loop_device *lo) * file-backed loop devices: discarded regions read back as zero. */ if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { - struct request_queue *backingq; - - backingq = bdev_get_queue(inode->i_bdev); + struct request_queue *backingq = bdev_get_queue(I_BDEV(inode)); max_discard_sectors = backingq->limits.max_write_zeroes_sectors; granularity = backingq->limits.discard_granularity ?: @@ -1075,7 +1069,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct file *file; struct inode *inode; struct address_space *mapping; - struct block_device *claimed_bdev = NULL; int error; loff_t size; bool partscan; @@ -1094,8 +1087,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev->bd_contains; - error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure); + error = bd_prepare_to_claim(bdev, loop_configure); if (error) goto out_putf; } @@ -1138,7 +1130,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, if (error) goto out_unlock; - set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); + set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; @@ -1168,9 +1160,6 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); - set_blocksize(bdev, S_ISBLK(inode->i_mode) ? - block_size(inode->i_bdev) : PAGE_SIZE); - lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1185,15 +1174,15 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_configure); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); out_bdev: - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_configure); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); out: @@ -1252,7 +1241,6 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) set_capacity(lo->lo_disk, 0); loop_sysfs_exit(lo); if (bdev) { - bd_set_nr_sectors(bdev, 0); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } @@ -2235,24 +2223,18 @@ static int loop_lookup(struct loop_device **l, int i) return ret; } -static struct kobject *loop_probe(dev_t dev, int *part, void *data) +static void loop_probe(dev_t dev) { + int idx = MINOR(dev) >> part_shift; struct loop_device *lo; - struct kobject *kobj; - int err; + + if (max_loop && idx >= max_loop) + return; mutex_lock(&loop_ctl_mutex); - err = loop_lookup(&lo, MINOR(dev) >> part_shift); - if (err < 0) - err = loop_add(&lo, MINOR(dev) >> part_shift); - if (err < 0) - kobj = NULL; - else - kobj = get_disk_and_module(lo->lo_disk); + if (loop_lookup(&lo, idx) < 0) + loop_add(&lo, idx); mutex_unlock(&loop_ctl_mutex); - - *part = 0; - return kobj; } static long loop_control_ioctl(struct file *file, unsigned int cmd, @@ -2322,7 +2304,6 @@ MODULE_ALIAS("devname:loop-control"); static int __init loop_init(void) { int i, nr; - unsigned long range; struct loop_device *lo; int err; @@ -2359,27 +2340,21 @@ static int __init loop_init(void) * /dev/loop-control interface, or be instantiated by accessing * a 'dead' device node. */ - if (max_loop) { + if (max_loop) nr = max_loop; - range = max_loop << part_shift; - } else { + else nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; - range = 1UL << MINORBITS; - } err = misc_register(&loop_misc); if (err < 0) goto err_out; - if (register_blkdev(LOOP_MAJOR, "loop")) { + if (__register_blkdev(LOOP_MAJOR, "loop", loop_probe)) { err = -EIO; goto misc_out; } - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, - THIS_MODULE, loop_probe, NULL, NULL); - /* pre-create number of devices given by config or max_loop */ mutex_lock(&loop_ctl_mutex); for (i = 0; i < nr; i++) @@ -2405,16 +2380,11 @@ static int loop_exit_cb(int id, void *ptr, void *data) static void __exit loop_exit(void) { - unsigned long range; - - range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; - mutex_lock(&loop_ctl_mutex); idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); idr_destroy(&loop_index_idr); - blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); misc_deregister(&loop_misc); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 153e2cdecb4d..53ac59d19ae5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3687,7 +3687,6 @@ static int mtip_block_initialize(struct driver_data *dd) /* Enable the block device and add it to /dev */ device_add_disk(&dd->pdev->dev, dd->disk, NULL); - dd->bdev = bdget_disk(dd->disk, 0); /* * Now that the disk is active, initialize any sysfs attributes * managed by the protocol layer. @@ -3721,9 +3720,6 @@ static int mtip_block_initialize(struct driver_data *dd) return rv; kthread_run_error: - bdput(dd->bdev); - dd->bdev = NULL; - /* Delete our gendisk. This also removes the device from /dev */ del_gendisk(dd->disk); @@ -3804,14 +3800,6 @@ static int mtip_block_remove(struct driver_data *dd) blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); blk_mq_unquiesce_queue(dd->queue); - /* - * Delete our gendisk structure. This also removes the device - * from /dev - */ - if (dd->bdev) { - bdput(dd->bdev); - dd->bdev = NULL; - } if (dd->disk) { if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) del_gendisk(dd->disk); @@ -4206,9 +4194,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) } while (atomic_read(&dd->irq_workers_active) != 0 && time_before(jiffies, to)); - if (!dd->sr) - fsync_bdev(dd->bdev); - if (atomic_read(&dd->irq_workers_active) != 0) { dev_warn(&dd->pdev->dev, "Completion workers still active!\n"); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index e22a7f0523bf..88f4206310e4 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -463,8 +463,6 @@ struct driver_data { int isr_binding; - struct block_device *bdev; - struct list_head online_list; /* linkage for online list */ struct list_head remove_list; /* linkage for removing list */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index aaae9220f3a0..92f84ed0ba9e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -296,40 +296,32 @@ static void nbd_size_clear(struct nbd_device *nbd) } } -static void nbd_size_update(struct nbd_device *nbd, bool start) +static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, + loff_t blksize) { - struct nbd_config *config = nbd->config; - struct block_device *bdev = bdget_disk(nbd->disk, 0); - sector_t nr_sectors = config->bytesize >> 9; + if (!blksize) + blksize = NBD_DEF_BLKSIZE; + if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize)) + return -EINVAL; - if (config->flags & NBD_FLAG_SEND_TRIM) { - nbd->disk->queue->limits.discard_granularity = config->blksize; - nbd->disk->queue->limits.discard_alignment = config->blksize; + nbd->config->bytesize = bytesize; + nbd->config->blksize = blksize; + + if (!nbd->task_recv) + return 0; + + if (nbd->config->flags & NBD_FLAG_SEND_TRIM) { + nbd->disk->queue->limits.discard_granularity = blksize; + nbd->disk->queue->limits.discard_alignment = blksize; blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); } - blk_queue_logical_block_size(nbd->disk->queue, config->blksize); - blk_queue_physical_block_size(nbd->disk->queue, config->blksize); - set_capacity(nbd->disk, nr_sectors); - if (bdev) { - if (bdev->bd_disk) { - bd_set_nr_sectors(bdev, nr_sectors); - if (start) - set_blocksize(bdev, config->blksize); - } else - set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); - bdput(bdev); - } - kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); -} + blk_queue_logical_block_size(nbd->disk->queue, blksize); + blk_queue_physical_block_size(nbd->disk->queue, blksize); -static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, - loff_t nr_blocks) -{ - struct nbd_config *config = nbd->config; - config->blksize = blocksize; - config->bytesize = blocksize * nr_blocks; - if (nbd->task_recv != NULL) - nbd_size_update(nbd, false); + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); + if (!set_capacity_and_notify(nbd->disk, bytesize >> 9)) + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); + return 0; } static void nbd_complete_rq(struct request *req) @@ -1140,7 +1132,7 @@ static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) return; - bd_set_nr_sectors(bdev, 0); + set_capacity(bdev->bd_disk, 0); } static void nbd_parse_flags(struct nbd_device *nbd) @@ -1309,8 +1301,7 @@ static int nbd_start_device(struct nbd_device *nbd) args->index = i; queue_work(nbd->recv_workq, &args->work); } - nbd_size_update(nbd, true); - return error; + return nbd_set_size(nbd, config->bytesize, config->blksize); } static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) @@ -1352,14 +1343,6 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd, nbd_config_put(nbd); } -static bool nbd_is_valid_blksize(unsigned long blksize) -{ - if (!blksize || !is_power_of_2(blksize) || blksize < 512 || - blksize > PAGE_SIZE) - return false; - return true; -} - static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout) { nbd->tag_set.timeout = timeout * HZ; @@ -1384,20 +1367,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SOCK: return nbd_add_socket(nbd, arg, false); case NBD_SET_BLKSIZE: - if (!arg) - arg = NBD_DEF_BLKSIZE; - if (!nbd_is_valid_blksize(arg)) - return -EINVAL; - nbd_size_set(nbd, arg, - div_s64(config->bytesize, arg)); - return 0; + return nbd_set_size(nbd, config->bytesize, arg); case NBD_SET_SIZE: - nbd_size_set(nbd, config->blksize, - div_s64(arg, config->blksize)); - return 0; + return nbd_set_size(nbd, arg, config->blksize); case NBD_SET_SIZE_BLOCKS: - nbd_size_set(nbd, config->blksize, arg); - return 0; + return nbd_set_size(nbd, arg * config->blksize, + config->blksize); case NBD_SET_TIMEOUT: nbd_set_cmd_timeout(nbd, arg); return 0; @@ -1513,12 +1488,10 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) static void nbd_release(struct gendisk *disk, fmode_t mode) { struct nbd_device *nbd = disk->private_data; - struct block_device *bdev = bdget_disk(disk, 0); if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && - bdev->bd_openers == 0) + disk->part0->bd_openers == 0) nbd_disconnect_and_put(nbd); - bdput(bdev); nbd_config_put(nbd); nbd_put(nbd); @@ -1815,18 +1788,11 @@ static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd) if (info->attrs[NBD_ATTR_SIZE_BYTES]) bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); - if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); - if (!bsize) - bsize = NBD_DEF_BLKSIZE; - if (!nbd_is_valid_blksize(bsize)) { - printk(KERN_ERR "Invalid block size %llu\n", bsize); - return -EINVAL; - } - } if (bytes != config->bytesize || bsize != config->blksize) - nbd_size_set(nbd, bsize, div64_u64(bytes, bsize)); + return nbd_set_size(nbd, bytes, bsize); return 0; } diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig new file mode 100644 index 000000000000..6bf1f8ca20a2 --- /dev/null +++ b/drivers/block/null_blk/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Null block device driver configuration +# + +config BLK_DEV_NULL_BLK + tristate "Null test block driver" + select CONFIGFS_FS + +config BLK_DEV_NULL_BLK_FAULT_INJECTION + bool "Support fault injection for Null test block driver" + depends on BLK_DEV_NULL_BLK && FAULT_INJECTION diff --git a/drivers/block/null_blk/Makefile b/drivers/block/null_blk/Makefile new file mode 100644 index 000000000000..84c36e512ab8 --- /dev/null +++ b/drivers/block/null_blk/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +# needed for trace events +ccflags-y += -I$(src) + +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o +null_blk-objs := main.o +ifeq ($(CONFIG_BLK_DEV_ZONED), y) +null_blk-$(CONFIG_TRACING) += trace.o +endif +null_blk-$(CONFIG_BLK_DEV_ZONED) += zoned.o diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk/main.c similarity index 97% rename from drivers/block/null_blk_main.c rename to drivers/block/null_blk/main.c index 4685ea401d5b..5357c3a4a36f 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk/main.c @@ -152,6 +152,10 @@ static int g_bs = 512; module_param_named(bs, g_bs, int, 0444); MODULE_PARM_DESC(bs, "Block size (in bytes)"); +static int g_max_sectors; +module_param_named(max_sectors, g_max_sectors, int, 0444); +MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)"); + static unsigned int nr_devices = 1; module_param(nr_devices, uint, 0444); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); @@ -346,6 +350,7 @@ NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); NULLB_DEVICE_ATTR(home_node, uint, NULL); NULLB_DEVICE_ATTR(queue_mode, uint, NULL); NULLB_DEVICE_ATTR(blocksize, uint, NULL); +NULLB_DEVICE_ATTR(max_sectors, uint, NULL); NULLB_DEVICE_ATTR(irqmode, uint, NULL); NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL); NULLB_DEVICE_ATTR(index, uint, NULL); @@ -463,6 +468,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_home_node, &nullb_device_attr_queue_mode, &nullb_device_attr_blocksize, + &nullb_device_attr_max_sectors, &nullb_device_attr_irqmode, &nullb_device_attr_hw_queue_depth, &nullb_device_attr_index, @@ -533,7 +539,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, - "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n"); + "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -588,6 +594,7 @@ static struct nullb_device *null_alloc_dev(void) dev->home_node = g_home_node; dev->queue_mode = g_queue_mode; dev->blocksize = g_bs; + dev->max_sectors = g_max_sectors; dev->irqmode = g_irqmode; dev->hw_queue_depth = g_hw_queue_depth; dev->blocking = g_blocking; @@ -1076,13 +1083,16 @@ static void nullb_fill_pattern(struct nullb *nullb, struct page *page, kunmap_atomic(dst); } -static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) +blk_status_t null_handle_discard(struct nullb_device *dev, + sector_t sector, sector_t nr_sectors) { + struct nullb *nullb = dev->nullb; + size_t n = nr_sectors << SECTOR_SHIFT; size_t temp; spin_lock_irq(&nullb->lock); while (n > 0) { - temp = min_t(size_t, n, nullb->dev->blocksize); + temp = min_t(size_t, n, dev->blocksize); null_free_sector(nullb, sector, false); if (null_cache_active(nullb)) null_free_sector(nullb, sector, true); @@ -1090,6 +1100,8 @@ static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) n -= temp; } spin_unlock_irq(&nullb->lock); + + return BLK_STS_OK; } static int null_handle_flush(struct nullb *nullb) @@ -1149,17 +1161,10 @@ static int null_handle_rq(struct nullb_cmd *cmd) struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; - sector_t sector; + sector_t sector = blk_rq_pos(rq); struct req_iterator iter; struct bio_vec bvec; - sector = blk_rq_pos(rq); - - if (req_op(rq) == REQ_OP_DISCARD) { - null_handle_discard(nullb, sector, blk_rq_bytes(rq)); - return 0; - } - spin_lock_irq(&nullb->lock); rq_for_each_segment(bvec, rq, iter) { len = bvec.bv_len; @@ -1183,18 +1188,10 @@ static int null_handle_bio(struct nullb_cmd *cmd) struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; - sector_t sector; + sector_t sector = bio->bi_iter.bi_sector; struct bio_vec bvec; struct bvec_iter iter; - sector = bio->bi_iter.bi_sector; - - if (bio_op(bio) == REQ_OP_DISCARD) { - null_handle_discard(nullb, sector, - bio_sectors(bio) << SECTOR_SHIFT); - return 0; - } - spin_lock_irq(&nullb->lock); bio_for_each_segment(bvec, bio, iter) { len = bvec.bv_len; @@ -1263,11 +1260,16 @@ static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, } static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, - enum req_opf op) + enum req_opf op, + sector_t sector, + sector_t nr_sectors) { struct nullb_device *dev = cmd->nq->dev; int err; + if (op == REQ_OP_DISCARD) + return null_handle_discard(dev, sector, nr_sectors); + if (dev->queue_mode == NULL_Q_BIO) err = null_handle_bio(cmd); else @@ -1343,7 +1345,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, } if (dev->memory_backed) - return null_handle_memory_backed(cmd, op); + return null_handle_memory_backed(cmd, op, sector, nr_sectors); return BLK_STS_OK; } @@ -1589,6 +1591,12 @@ static void null_config_discard(struct nullb *nullb) if (nullb->dev->discard == false) return; + if (!nullb->dev->memory_backed) { + nullb->dev->discard = false; + pr_info("discard option is ignored without memory backing\n"); + return; + } + if (nullb->dev->zoned) { nullb->dev->discard = false; pr_info("discard option is ignored in zoned mode\n"); @@ -1866,6 +1874,11 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_logical_block_size(nullb->q, dev->blocksize); blk_queue_physical_block_size(nullb->q, dev->blocksize); + if (!dev->max_sectors) + dev->max_sectors = queue_max_hw_sectors(nullb->q); + dev->max_sectors = min_t(unsigned int, dev->max_sectors, + BLK_DEF_MAX_SECTORS); + blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); null_config_discard(nullb); @@ -1909,6 +1922,12 @@ static int __init null_init(void) g_bs = PAGE_SIZE; } + if (g_max_sectors > BLK_DEF_MAX_SECTORS) { + pr_warn("invalid max sectors\n"); + pr_warn("defaults max sectors to %u\n", BLK_DEF_MAX_SECTORS); + g_max_sectors = BLK_DEF_MAX_SECTORS; + } + if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { pr_err("invalid home_node value\n"); g_home_node = NUMA_NO_NODE; diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk/null_blk.h similarity index 83% rename from drivers/block/null_blk.h rename to drivers/block/null_blk/null_blk.h index c24d9b5ad81a..83504f3cc9d6 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include struct nullb_cmd { struct request *rq; @@ -32,6 +34,26 @@ struct nullb_queue { struct nullb_cmd *cmds; }; +struct nullb_zone { + /* + * Zone lock to prevent concurrent modification of a zone write + * pointer position and condition: with memory backing, a write + * command execution may sleep on memory allocation. For this case, + * use mutex as the zone lock. Otherwise, use the spinlock for + * locking the zone. + */ + union { + spinlock_t spinlock; + struct mutex mutex; + }; + enum blk_zone_type type; + enum blk_zone_cond cond; + sector_t start; + sector_t wp; + unsigned int len; + unsigned int capacity; +}; + struct nullb_device { struct nullb *nullb; struct config_item item; @@ -45,10 +67,11 @@ struct nullb_device { unsigned int nr_zones_imp_open; unsigned int nr_zones_exp_open; unsigned int nr_zones_closed; - struct blk_zone *zones; + unsigned int imp_close_zone_no; + struct nullb_zone *zones; sector_t zone_size_sects; - spinlock_t zone_lock; - unsigned long *zone_locks; + bool need_zone_res_mgmt; + spinlock_t zone_res_lock; unsigned long size; /* device size in MB */ unsigned long completion_nsec; /* time in ns to complete a request */ @@ -62,6 +85,7 @@ struct nullb_device { unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ unsigned int blocksize; /* block size */ + unsigned int max_sectors; /* Max sectors per command */ unsigned int irqmode; /* IRQ completion handler */ unsigned int hw_queue_depth; /* queue depth */ unsigned int index; /* index of the disk, only valid with a disk */ @@ -93,6 +117,8 @@ struct nullb { char disk_name[DISK_NAME_LEN]; }; +blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector, + sector_t nr_sectors); blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, unsigned int nr_sectors); diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk/trace.c similarity index 93% rename from drivers/block/null_blk_trace.c rename to drivers/block/null_blk/trace.c index f246e7bff698..3711cba16071 100644 --- a/drivers/block/null_blk_trace.c +++ b/drivers/block/null_blk/trace.c @@ -4,7 +4,7 @@ * * Copyright (C) 2020 Western Digital Corporation or its affiliates. */ -#include "null_blk_trace.h" +#include "trace.h" /* * Helper to use for all null_blk traces to extract disk name. diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk/trace.h similarity index 97% rename from drivers/block/null_blk_trace.h rename to drivers/block/null_blk/trace.h index 4f83032eb544..ce3b430e88c5 100644 --- a/drivers/block/null_blk_trace.h +++ b/drivers/block/null_blk/trace.h @@ -73,7 +73,7 @@ TRACE_EVENT(nullb_report_zones, #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE null_blk_trace +#define TRACE_INCLUDE_FILE trace /* This part must be outside protection */ #include diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk/zoned.c similarity index 68% rename from drivers/block/null_blk_zoned.c rename to drivers/block/null_blk/zoned.c index beb34b4f76b0..148b871f263b 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -4,19 +4,58 @@ #include "null_blk.h" #define CREATE_TRACE_POINTS -#include "null_blk_trace.h" +#include "trace.h" -/* zone_size in MBs to sectors. */ -#define ZONE_SIZE_SHIFT 11 +#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT) static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) { return sect >> ilog2(dev->zone_size_sects); } +static inline void null_lock_zone_res(struct nullb_device *dev) +{ + if (dev->need_zone_res_mgmt) + spin_lock_irq(&dev->zone_res_lock); +} + +static inline void null_unlock_zone_res(struct nullb_device *dev) +{ + if (dev->need_zone_res_mgmt) + spin_unlock_irq(&dev->zone_res_lock); +} + +static inline void null_init_zone_lock(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_lock_init(&zone->spinlock); + else + mutex_init(&zone->mutex); +} + +static inline void null_lock_zone(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_lock_irq(&zone->spinlock); + else + mutex_lock(&zone->mutex); +} + +static inline void null_unlock_zone(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_unlock_irq(&zone->spinlock); + else + mutex_unlock(&zone->mutex); +} + int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) { - sector_t dev_size = (sector_t)dev->size * 1024 * 1024; + sector_t dev_capacity_sects, zone_capacity_sects; + struct nullb_zone *zone; sector_t sector = 0; unsigned int i; @@ -38,29 +77,19 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) return -EINVAL; } - dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT; - dev->nr_zones = dev_size >> - (SECTOR_SHIFT + ilog2(dev->zone_size_sects)); - dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct blk_zone), - GFP_KERNEL | __GFP_ZERO); + zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity); + dev_capacity_sects = MB_TO_SECTS(dev->size); + dev->zone_size_sects = MB_TO_SECTS(dev->zone_size); + dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects); + if (dev_capacity_sects & (dev->zone_size_sects - 1)) + dev->nr_zones++; + + dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct nullb_zone), + GFP_KERNEL | __GFP_ZERO); if (!dev->zones) return -ENOMEM; - /* - * With memory backing, the zone_lock spinlock needs to be temporarily - * released to avoid scheduling in atomic context. To guarantee zone - * information protection, use a bitmap to lock zones with - * wait_on_bit_lock_io(). Sleeping on the lock is OK as memory backing - * implies that the queue is marked with BLK_MQ_F_BLOCKING. - */ - spin_lock_init(&dev->zone_lock); - if (dev->memory_backed) { - dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL); - if (!dev->zone_locks) { - kvfree(dev->zones); - return -ENOMEM; - } - } + spin_lock_init(&dev->zone_res_lock); if (dev->zone_nr_conv >= dev->nr_zones) { dev->zone_nr_conv = dev->nr_zones - 1; @@ -83,10 +112,13 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) dev->zone_max_open = 0; pr_info("zone_max_open limit disabled, limit >= zone count\n"); } + dev->need_zone_res_mgmt = dev->zone_max_active || dev->zone_max_open; + dev->imp_close_zone_no = dev->zone_nr_conv; for (i = 0; i < dev->zone_nr_conv; i++) { - struct blk_zone *zone = &dev->zones[i]; + zone = &dev->zones[i]; + null_init_zone_lock(dev, zone); zone->start = sector; zone->len = dev->zone_size_sects; zone->capacity = zone->len; @@ -98,11 +130,16 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) } for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { - struct blk_zone *zone = &dev->zones[i]; + zone = &dev->zones[i]; + null_init_zone_lock(dev, zone); zone->start = zone->wp = sector; - zone->len = dev->zone_size_sects; - zone->capacity = dev->zone_capacity << ZONE_SIZE_SHIFT; + if (zone->start + dev->zone_size_sects > dev_capacity_sects) + zone->len = dev_capacity_sects - zone->start; + else + zone->len = dev->zone_size_sects; + zone->capacity = + min_t(sector_t, zone->len, zone_capacity_sects); zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone->cond = BLK_ZONE_COND_EMPTY; @@ -140,32 +177,17 @@ int null_register_zoned_dev(struct nullb *nullb) void null_free_zoned_dev(struct nullb_device *dev) { - bitmap_free(dev->zone_locks); kvfree(dev->zones); } -static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno) -{ - if (dev->memory_backed) - wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE); - spin_lock_irq(&dev->zone_lock); -} - -static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno) -{ - spin_unlock_irq(&dev->zone_lock); - - if (dev->memory_backed) - clear_and_wake_up_bit(zno, dev->zone_locks); -} - int null_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct nullb *nullb = disk->private_data; struct nullb_device *dev = nullb->dev; - unsigned int first_zone, i, zno; - struct blk_zone zone; + unsigned int first_zone, i; + struct nullb_zone *zone; + struct blk_zone blkz; int error; first_zone = null_zone_no(dev, sector); @@ -175,19 +197,25 @@ int null_report_zones(struct gendisk *disk, sector_t sector, nr_zones = min(nr_zones, dev->nr_zones - first_zone); trace_nullb_report_zones(nullb, nr_zones); - zno = first_zone; - for (i = 0; i < nr_zones; i++, zno++) { + memset(&blkz, 0, sizeof(struct blk_zone)); + zone = &dev->zones[first_zone]; + for (i = 0; i < nr_zones; i++, zone++) { /* * Stacked DM target drivers will remap the zone information by * modifying the zone information passed to the report callback. * So use a local copy to avoid corruption of the device zone * array. */ - null_lock_zone(dev, zno); - memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone)); - null_unlock_zone(dev, zno); + null_lock_zone(dev, zone); + blkz.start = zone->start; + blkz.len = zone->len; + blkz.wp = zone->wp; + blkz.type = zone->type; + blkz.cond = zone->cond; + blkz.capacity = zone->capacity; + null_unlock_zone(dev, zone); - error = cb(&zone, i, data); + error = cb(&blkz, i, data); if (error) return error; } @@ -203,7 +231,7 @@ size_t null_zone_valid_read_len(struct nullb *nullb, sector_t sector, unsigned int len) { struct nullb_device *dev = nullb->dev; - struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)]; + struct nullb_zone *zone = &dev->zones[null_zone_no(dev, sector)]; unsigned int nr_sectors = len >> SECTOR_SHIFT; /* Read must be below the write pointer position */ @@ -217,11 +245,9 @@ size_t null_zone_valid_read_len(struct nullb *nullb, return (zone->wp - sector) << SECTOR_SHIFT; } -static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t __null_close_zone(struct nullb_device *dev, + struct nullb_zone *zone) { - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) - return BLK_STS_IOERR; - switch (zone->cond) { case BLK_ZONE_COND_CLOSED: /* close operation on closed is not an error */ @@ -248,13 +274,24 @@ static blk_status_t null_close_zone(struct nullb_device *dev, struct blk_zone *z return BLK_STS_OK; } -static void null_close_first_imp_zone(struct nullb_device *dev) +static void null_close_imp_open_zone(struct nullb_device *dev) { - unsigned int i; + struct nullb_zone *zone; + unsigned int zno, i; + + zno = dev->imp_close_zone_no; + if (zno >= dev->nr_zones) + zno = dev->zone_nr_conv; for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { - if (dev->zones[i].cond == BLK_ZONE_COND_IMP_OPEN) { - null_close_zone(dev, &dev->zones[i]); + zone = &dev->zones[zno]; + zno++; + if (zno >= dev->nr_zones) + zno = dev->zone_nr_conv; + + if (zone->cond == BLK_ZONE_COND_IMP_OPEN) { + __null_close_zone(dev, zone); + dev->imp_close_zone_no = zno; return; } } @@ -282,7 +319,7 @@ static blk_status_t null_check_open(struct nullb_device *dev) if (dev->nr_zones_imp_open) { if (null_check_active(dev) == BLK_STS_OK) { - null_close_first_imp_zone(dev); + null_close_imp_open_zone(dev); return BLK_STS_OK; } } @@ -303,7 +340,8 @@ static blk_status_t null_check_open(struct nullb_device *dev) * it is not certain that closing an implicit open zone will allow a new zone * to be opened, since we might already be at the active limit capacity. */ -static blk_status_t null_check_zone_resources(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_check_zone_resources(struct nullb_device *dev, + struct nullb_zone *zone) { blk_status_t ret; @@ -327,34 +365,23 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, { struct nullb_device *dev = cmd->nq->dev; unsigned int zno = null_zone_no(dev, sector); - struct blk_zone *zone = &dev->zones[zno]; + struct nullb_zone *zone = &dev->zones[zno]; blk_status_t ret; trace_nullb_zone_op(cmd, zno, zone->cond); - if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) { + if (append) + return BLK_STS_IOERR; return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); + } - null_lock_zone(dev, zno); + null_lock_zone(dev, zone); - switch (zone->cond) { - case BLK_ZONE_COND_FULL: + if (zone->cond == BLK_ZONE_COND_FULL) { /* Cannot write to a full zone */ ret = BLK_STS_IOERR; goto unlock; - case BLK_ZONE_COND_EMPTY: - case BLK_ZONE_COND_CLOSED: - ret = null_check_zone_resources(dev, zone); - if (ret != BLK_STS_OK) - goto unlock; - break; - case BLK_ZONE_COND_IMP_OPEN: - case BLK_ZONE_COND_EXP_OPEN: - break; - default: - /* Invalid zone condition */ - ret = BLK_STS_IOERR; - goto unlock; } /* @@ -379,60 +406,69 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, goto unlock; } - if (zone->cond == BLK_ZONE_COND_CLOSED) { - dev->nr_zones_closed--; - dev->nr_zones_imp_open++; - } else if (zone->cond == BLK_ZONE_COND_EMPTY) { - dev->nr_zones_imp_open++; + if (zone->cond == BLK_ZONE_COND_CLOSED || + zone->cond == BLK_ZONE_COND_EMPTY) { + null_lock_zone_res(dev); + + ret = null_check_zone_resources(dev, zone); + if (ret != BLK_STS_OK) { + null_unlock_zone_res(dev); + goto unlock; + } + if (zone->cond == BLK_ZONE_COND_CLOSED) { + dev->nr_zones_closed--; + dev->nr_zones_imp_open++; + } else if (zone->cond == BLK_ZONE_COND_EMPTY) { + dev->nr_zones_imp_open++; + } + + if (zone->cond != BLK_ZONE_COND_EXP_OPEN) + zone->cond = BLK_ZONE_COND_IMP_OPEN; + + null_unlock_zone_res(dev); } - if (zone->cond != BLK_ZONE_COND_EXP_OPEN) - zone->cond = BLK_ZONE_COND_IMP_OPEN; - /* - * Memory backing allocation may sleep: release the zone_lock spinlock - * to avoid scheduling in atomic context. Zone operation atomicity is - * still guaranteed through the zone_locks bitmap. - */ - if (dev->memory_backed) - spin_unlock_irq(&dev->zone_lock); ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors); - if (dev->memory_backed) - spin_lock_irq(&dev->zone_lock); - if (ret != BLK_STS_OK) goto unlock; zone->wp += nr_sectors; if (zone->wp == zone->start + zone->capacity) { + null_lock_zone_res(dev); if (zone->cond == BLK_ZONE_COND_EXP_OPEN) dev->nr_zones_exp_open--; else if (zone->cond == BLK_ZONE_COND_IMP_OPEN) dev->nr_zones_imp_open--; zone->cond = BLK_ZONE_COND_FULL; + null_unlock_zone_res(dev); } + ret = BLK_STS_OK; unlock: - null_unlock_zone(dev, zno); + null_unlock_zone(dev, zone); return ret; } -static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_open_zone(struct nullb_device *dev, + struct nullb_zone *zone) { - blk_status_t ret; + blk_status_t ret = BLK_STS_OK; if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; + null_lock_zone_res(dev); + switch (zone->cond) { case BLK_ZONE_COND_EXP_OPEN: /* open operation on exp open is not an error */ - return BLK_STS_OK; + goto unlock; case BLK_ZONE_COND_EMPTY: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; break; case BLK_ZONE_COND_IMP_OPEN: dev->nr_zones_imp_open--; @@ -440,35 +476,57 @@ static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zo case BLK_ZONE_COND_CLOSED: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; dev->nr_zones_closed--; break; case BLK_ZONE_COND_FULL: default: - return BLK_STS_IOERR; + ret = BLK_STS_IOERR; + goto unlock; } zone->cond = BLK_ZONE_COND_EXP_OPEN; dev->nr_zones_exp_open++; - return BLK_STS_OK; +unlock: + null_unlock_zone_res(dev); + + return ret; } -static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_close_zone(struct nullb_device *dev, + struct nullb_zone *zone) { blk_status_t ret; if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; + null_lock_zone_res(dev); + ret = __null_close_zone(dev, zone); + null_unlock_zone_res(dev); + + return ret; +} + +static blk_status_t null_finish_zone(struct nullb_device *dev, + struct nullb_zone *zone) +{ + blk_status_t ret = BLK_STS_OK; + + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; + + null_lock_zone_res(dev); + switch (zone->cond) { case BLK_ZONE_COND_FULL: /* finish operation on full is not an error */ - return BLK_STS_OK; + goto unlock; case BLK_ZONE_COND_EMPTY: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; break; case BLK_ZONE_COND_IMP_OPEN: dev->nr_zones_imp_open--; @@ -479,27 +537,35 @@ static blk_status_t null_finish_zone(struct nullb_device *dev, struct blk_zone * case BLK_ZONE_COND_CLOSED: ret = null_check_zone_resources(dev, zone); if (ret != BLK_STS_OK) - return ret; + goto unlock; dev->nr_zones_closed--; break; default: - return BLK_STS_IOERR; + ret = BLK_STS_IOERR; + goto unlock; } zone->cond = BLK_ZONE_COND_FULL; zone->wp = zone->start + zone->len; - return BLK_STS_OK; +unlock: + null_unlock_zone_res(dev); + + return ret; } -static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *zone) +static blk_status_t null_reset_zone(struct nullb_device *dev, + struct nullb_zone *zone) { if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) return BLK_STS_IOERR; + null_lock_zone_res(dev); + switch (zone->cond) { case BLK_ZONE_COND_EMPTY: /* reset operation on empty is not an error */ + null_unlock_zone_res(dev); return BLK_STS_OK; case BLK_ZONE_COND_IMP_OPEN: dev->nr_zones_imp_open--; @@ -513,12 +579,18 @@ static blk_status_t null_reset_zone(struct nullb_device *dev, struct blk_zone *z case BLK_ZONE_COND_FULL: break; default: + null_unlock_zone_res(dev); return BLK_STS_IOERR; } zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; + null_unlock_zone_res(dev); + + if (dev->memory_backed) + return null_handle_discard(dev, zone->start, zone->len); + return BLK_STS_OK; } @@ -527,19 +599,19 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, { struct nullb_device *dev = cmd->nq->dev; unsigned int zone_no; - struct blk_zone *zone; + struct nullb_zone *zone; blk_status_t ret; size_t i; if (op == REQ_OP_ZONE_RESET_ALL) { for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) { - null_lock_zone(dev, i); zone = &dev->zones[i]; + null_lock_zone(dev, zone); if (zone->cond != BLK_ZONE_COND_EMPTY) { null_reset_zone(dev, zone); trace_nullb_zone_op(cmd, i, zone->cond); } - null_unlock_zone(dev, i); + null_unlock_zone(dev, zone); } return BLK_STS_OK; } @@ -547,7 +619,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, zone_no = null_zone_no(dev, sector); zone = &dev->zones[zone_no]; - null_lock_zone(dev, zone_no); + null_lock_zone(dev, zone); switch (op) { case REQ_OP_ZONE_RESET: @@ -570,7 +642,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, if (ret == BLK_STS_OK) trace_nullb_zone_op(cmd, zone_no, zone->cond); - null_unlock_zone(dev, zone_no); + null_unlock_zone(dev, zone); return ret; } @@ -578,29 +650,28 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, sector_t nr_sectors) { - struct nullb_device *dev = cmd->nq->dev; - unsigned int zno = null_zone_no(dev, sector); + struct nullb_device *dev; + struct nullb_zone *zone; blk_status_t sts; switch (op) { case REQ_OP_WRITE: - sts = null_zone_write(cmd, sector, nr_sectors, false); - break; + return null_zone_write(cmd, sector, nr_sectors, false); case REQ_OP_ZONE_APPEND: - sts = null_zone_write(cmd, sector, nr_sectors, true); - break; + return null_zone_write(cmd, sector, nr_sectors, true); case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: - sts = null_zone_mgmt(cmd, op, sector); - break; + return null_zone_mgmt(cmd, op, sector); default: - null_lock_zone(dev, zno); - sts = null_process_cmd(cmd, op, sector, nr_sectors); - null_unlock_zone(dev, zno); - } + dev = cmd->nq->dev; + zone = &dev->zones[null_zone_no(dev, sector)]; - return sts; + null_lock_zone(dev, zone); + sts = null_process_cmd(cmd, op, sector, nr_sectors); + null_unlock_zone(dev, zone); + return sts; + } } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 467dbd06b7cd..b8bb8ec7538d 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2130,8 +2130,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) } set_capacity(pd->disk, lba << 2); - set_capacity(pd->bdev->bd_disk, lba << 2); - bd_set_nr_sectors(pd->bdev, lba << 2); + set_capacity_and_notify(pd->bdev->bd_disk, lba << 2); q = bdev_get_queue(pd->bdev); if (write) { @@ -2584,9 +2583,11 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, case CDROM_LAST_WRITTEN: case CDROM_SEND_PACKET: case SCSI_IOCTL_SEND_COMMAND: - ret = __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg); + if (!bdev->bd_disk->fops->ioctl) + ret = -ENOTTY; + else + ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); break; - default: pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); ret = -ENOTTY; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index f84128abade3..2ed79b09439a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -692,12 +692,9 @@ static void rbd_release(struct gendisk *disk, fmode_t mode) put_device(&rbd_dev->dev); } -static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) +static int rbd_set_read_only(struct block_device *bdev, bool ro) { - int ro; - - if (get_user(ro, (int __user *)arg)) - return -EFAULT; + struct rbd_device *rbd_dev = bdev->bd_disk->private_data; /* * Both images mapped read-only and snapshots can't be marked @@ -710,43 +707,14 @@ static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) rbd_assert(!rbd_is_snap(rbd_dev)); } - /* Let blkdev_roset() handle it */ - return -ENOTTY; + return 0; } -static int rbd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct rbd_device *rbd_dev = bdev->bd_disk->private_data; - int ret; - - switch (cmd) { - case BLKROSET: - ret = rbd_ioctl_set_ro(rbd_dev, arg); - break; - default: - ret = -ENOTTY; - } - - return ret; -} - -#ifdef CONFIG_COMPAT -static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - return rbd_ioctl(bdev, mode, cmd, arg); -} -#endif /* CONFIG_COMPAT */ - static const struct block_device_operations rbd_bd_ops = { .owner = THIS_MODULE, .open = rbd_open, .release = rbd_release, - .ioctl = rbd_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = rbd_compat_ioctl, -#endif + .set_read_only = rbd_set_read_only, }; /* @@ -4920,8 +4888,7 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev) !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) { size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; dout("setting size to %llu sectors", (unsigned long long)size); - set_capacity(rbd_dev->disk, size); - revalidate_disk_size(rbd_dev->disk, true); + set_capacity_and_notify(rbd_dev->disk, size); } } diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c index 4f4474eecadb..a7caeedeb198 100644 --- a/drivers/block/rnbd/rnbd-clt-sysfs.c +++ b/drivers/block/rnbd/rnbd-clt-sysfs.c @@ -37,7 +37,6 @@ enum { }; static const unsigned int rnbd_opt_mandatory[] = { - RNBD_OPT_PATH, RNBD_OPT_DEV_PATH, RNBD_OPT_SESSNAME, }; @@ -435,6 +434,7 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev) */ if (strlen(dev->blk_symlink_name) && try_module_get(THIS_MODULE)) { sysfs_remove_link(rnbd_devs_kobj, dev->blk_symlink_name); + kfree(dev->blk_symlink_name); module_put(THIS_MODULE); } } @@ -451,9 +451,11 @@ static int rnbd_clt_add_dev_kobj(struct rnbd_clt_dev *dev) ret = kobject_init_and_add(&dev->kobj, &rnbd_dev_ktype, gd_kobj, "%s", "rnbd"); - if (ret) + if (ret) { rnbd_clt_err(dev, "Failed to create device sysfs dir, err: %d\n", ret); + kobject_put(&dev->kobj); + } return ret; } @@ -481,16 +483,27 @@ static int rnbd_clt_get_path_name(struct rnbd_clt_dev *dev, char *buf, if (ret >= len) return -ENAMETOOLONG; + ret = snprintf(buf, len, "%s@%s", buf, dev->sess->sessname); + if (ret >= len) + return -ENAMETOOLONG; + return 0; } static int rnbd_clt_add_dev_symlink(struct rnbd_clt_dev *dev) { struct kobject *gd_kobj = &disk_to_dev(dev->gd)->kobj; - int ret; + int ret, len; + + len = strlen(dev->pathname) + strlen(dev->sess->sessname) + 2; + dev->blk_symlink_name = kzalloc(len, GFP_KERNEL); + if (!dev->blk_symlink_name) { + rnbd_clt_err(dev, "Failed to allocate memory for blk_symlink_name\n"); + return -ENOMEM; + } ret = rnbd_clt_get_path_name(dev, dev->blk_symlink_name, - sizeof(dev->blk_symlink_name)); + len); if (ret) { rnbd_clt_err(dev, "Failed to get /sys/block symlink path, err: %d\n", ret); diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 8b2411ccbda9..a199b190c73d 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -59,6 +59,7 @@ static void rnbd_clt_put_dev(struct rnbd_clt_dev *dev) ida_simple_remove(&index_ida, dev->clt_device_id); mutex_unlock(&ida_lock); kfree(dev->hw_queues); + kfree(dev->pathname); rnbd_clt_put_sess(dev->sess); mutex_destroy(&dev->lock); kfree(dev); @@ -100,8 +101,7 @@ static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev, rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", dev->nsectors, new_nsectors); dev->nsectors = new_nsectors; - set_capacity(dev->gd, dev->nsectors); - revalidate_disk_size(dev->gd, true); + set_capacity_and_notify(dev->gd, dev->nsectors); return 0; } @@ -1193,6 +1193,12 @@ find_and_get_or_create_sess(const char *sessname, else if (!first) return sess; + if (!path_cnt) { + pr_err("Session %s not found, and path parameter not given", sessname); + err = -ENXIO; + goto put_sess; + } + rtrs_ops = (struct rtrs_clt_ops) { .priv = sess, .link_ev = rnbd_clt_link_ev, @@ -1381,10 +1387,17 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, pathname, sess->sessname, ret); goto out_queues; } + + dev->pathname = kzalloc(strlen(pathname) + 1, GFP_KERNEL); + if (!dev->pathname) { + ret = -ENOMEM; + goto out_queues; + } + strlcpy(dev->pathname, pathname, strlen(pathname) + 1); + dev->clt_device_id = ret; dev->sess = sess; dev->access_mode = access_mode; - strlcpy(dev->pathname, pathname, sizeof(dev->pathname)); mutex_init(&dev->lock); refcount_set(&dev->refcount, 1); dev->dev_state = DEV_STATE_INIT; @@ -1404,17 +1417,20 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, return ERR_PTR(ret); } -static bool __exists_dev(const char *pathname) +static bool __exists_dev(const char *pathname, const char *sessname) { struct rnbd_clt_session *sess; struct rnbd_clt_dev *dev; bool found = false; list_for_each_entry(sess, &sess_list, list) { + if (sessname && strncmp(sess->sessname, sessname, + sizeof(sess->sessname))) + continue; mutex_lock(&sess->lock); list_for_each_entry(dev, &sess->devs_list, list) { - if (!strncmp(dev->pathname, pathname, - sizeof(dev->pathname))) { + if (strlen(dev->pathname) == strlen(pathname) && + !strcmp(dev->pathname, pathname)) { found = true; break; } @@ -1427,12 +1443,12 @@ static bool __exists_dev(const char *pathname) return found; } -static bool exists_devpath(const char *pathname) +static bool exists_devpath(const char *pathname, const char *sessname) { bool found; mutex_lock(&sess_lock); - found = __exists_dev(pathname); + found = __exists_dev(pathname, sessname); mutex_unlock(&sess_lock); return found; @@ -1445,7 +1461,7 @@ static bool insert_dev_if_not_exists_devpath(const char *pathname, bool found; mutex_lock(&sess_lock); - found = __exists_dev(pathname); + found = __exists_dev(pathname, sess->sessname); if (!found) { mutex_lock(&sess->lock); list_add_tail(&dev->list, &sess->devs_list); @@ -1475,7 +1491,7 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname, struct rnbd_clt_dev *dev; int ret; - if (exists_devpath(pathname)) + if (unlikely(exists_devpath(pathname, sessname))) return ERR_PTR(-EEXIST); sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr); diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h index ed33654aa486..b193d5904050 100644 --- a/drivers/block/rnbd/rnbd-clt.h +++ b/drivers/block/rnbd/rnbd-clt.h @@ -108,7 +108,7 @@ struct rnbd_clt_dev { u32 clt_device_id; struct mutex lock; enum rnbd_clt_dev_state dev_state; - char pathname[NAME_MAX]; + char *pathname; enum rnbd_access_mode access_mode; bool read_only; bool rotational; @@ -126,7 +126,7 @@ struct rnbd_clt_dev { struct list_head list; struct gendisk *gd; struct kobject kobj; - char blk_symlink_name[NAME_MAX]; + char *blk_symlink_name; refcount_t refcount; struct work_struct unmap_on_rmmod_work; }; diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c index 106775c074d1..05ffe488ddc6 100644 --- a/drivers/block/rnbd/rnbd-srv-sysfs.c +++ b/drivers/block/rnbd/rnbd-srv-sysfs.c @@ -47,13 +47,17 @@ int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, ret = kobject_init_and_add(&dev->dev_kobj, &dev_ktype, rnbd_devs_kobj, dev_name); - if (ret) + if (ret) { + kobject_put(&dev->dev_kobj); return ret; + } dev->dev_sessions_kobj = kobject_create_and_add("sessions", &dev->dev_kobj); - if (!dev->dev_sessions_kobj) - goto put_dev_kobj; + if (!dev->dev_sessions_kobj) { + ret = -ENOMEM; + goto free_dev_kobj; + } bdev_kobj = &disk_to_dev(bdev->bd_disk)->kobj; ret = sysfs_create_link(&dev->dev_kobj, bdev_kobj, "block_dev"); @@ -64,7 +68,8 @@ int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, put_sess_kobj: kobject_put(dev->dev_sessions_kobj); -put_dev_kobj: +free_dev_kobj: + kobject_del(&dev->dev_kobj); kobject_put(&dev->dev_kobj); return ret; } @@ -120,10 +125,46 @@ static ssize_t mapping_path_show(struct kobject *kobj, static struct kobj_attribute rnbd_srv_dev_session_mapping_path_attr = __ATTR_RO(mapping_path); +static ssize_t rnbd_srv_dev_session_force_close_show(struct kobject *kobj, + struct kobj_attribute *attr, char *page) +{ + return scnprintf(page, PAGE_SIZE, "Usage: echo 1 > %s\n", + attr->attr.name); +} + +static ssize_t rnbd_srv_dev_session_force_close_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct rnbd_srv_sess_dev *sess_dev; + + sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); + + if (!sysfs_streq(buf, "1")) { + rnbd_srv_err(sess_dev, "%s: invalid value: '%s'\n", + attr->attr.name, buf); + return -EINVAL; + } + + rnbd_srv_info(sess_dev, "force close requested\n"); + + /* first remove sysfs itself to avoid deadlock */ + sysfs_remove_file_self(&sess_dev->kobj, &attr->attr); + rnbd_srv_sess_dev_force_close(sess_dev); + + return count; +} + +static struct kobj_attribute rnbd_srv_dev_session_force_close_attr = + __ATTR(force_close, 0644, + rnbd_srv_dev_session_force_close_show, + rnbd_srv_dev_session_force_close_store); + static struct attribute *rnbd_srv_default_dev_sessions_attrs[] = { &rnbd_srv_dev_session_access_mode_attr.attr, &rnbd_srv_dev_session_ro_attr.attr, &rnbd_srv_dev_session_mapping_path_attr.attr, + &rnbd_srv_dev_session_force_close_attr.attr, NULL, }; @@ -145,7 +186,7 @@ static void rnbd_srv_sess_dev_release(struct kobject *kobj) struct rnbd_srv_sess_dev *sess_dev; sess_dev = container_of(kobj, struct rnbd_srv_sess_dev, kobj); - rnbd_destroy_sess_dev(sess_dev); + rnbd_destroy_sess_dev(sess_dev, sess_dev->keep_id); } static struct kobj_type rnbd_srv_sess_dev_ktype = { @@ -160,18 +201,17 @@ int rnbd_srv_create_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev) ret = kobject_init_and_add(&sess_dev->kobj, &rnbd_srv_sess_dev_ktype, sess_dev->dev->dev_sessions_kobj, "%s", sess_dev->sess->sessname); - if (ret) + if (ret) { + kobject_put(&sess_dev->kobj); return ret; + } ret = sysfs_create_group(&sess_dev->kobj, &rnbd_srv_default_dev_session_attr_group); - if (ret) - goto err; - - return 0; - -err: - kobject_put(&sess_dev->kobj); + if (ret) { + kobject_del(&sess_dev->kobj); + kobject_put(&sess_dev->kobj); + } return ret; } diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index e1bc8b4cd592..d1ee72ed8384 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -212,12 +212,20 @@ static void rnbd_put_srv_dev(struct rnbd_srv_dev *dev) kref_put(&dev->kref, destroy_device_cb); } -void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev) +void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id) { DECLARE_COMPLETION_ONSTACK(dc); - xa_erase(&sess_dev->sess->index_idr, sess_dev->device_id); + if (keep_id) + /* free the resources for the id but don't */ + /* allow to re-use the id itself because it */ + /* is still used by the client */ + xa_cmpxchg(&sess_dev->sess->index_idr, sess_dev->device_id, + sess_dev, NULL, 0); + else + xa_erase(&sess_dev->sess->index_idr, sess_dev->device_id); synchronize_rcu(); + sess_dev->destroy_comp = &dc; rnbd_put_sess_dev(sess_dev); wait_for_completion(&dc); /* wait for inflights to drop to zero */ @@ -328,6 +336,13 @@ static int rnbd_srv_link_ev(struct rtrs_srv *rtrs, } } +void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev) +{ + rnbd_srv_destroy_dev_session_sysfs(sess_dev); + sess_dev->keep_id = true; + +} + static int process_msg_close(struct rtrs_srv *rtrs, struct rnbd_srv_session *srv_sess, void *data, size_t datalen, const void *usr, diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h index 5a8544b5e74f..b157371c25ed 100644 --- a/drivers/block/rnbd/rnbd-srv.h +++ b/drivers/block/rnbd/rnbd-srv.h @@ -56,6 +56,7 @@ struct rnbd_srv_sess_dev { struct rnbd_srv_dev *dev; struct kobject kobj; u32 device_id; + bool keep_id; fmode_t open_flags; struct kref kref; struct completion *destroy_comp; @@ -63,6 +64,7 @@ struct rnbd_srv_sess_dev { enum rnbd_access_mode access_mode; }; +void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev); /* rnbd-srv-sysfs.c */ int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, @@ -73,6 +75,6 @@ int rnbd_srv_create_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev); void rnbd_srv_destroy_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev); int rnbd_srv_create_sysfs_files(void); void rnbd_srv_destroy_sysfs_files(void); -void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev); +void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id); #endif /* RNBD_SRV_H */ diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 52dd1efa00f9..cc6a0bc6c005 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -745,18 +745,6 @@ static const struct block_device_operations floppy_fops = { .check_events = floppy_check_events, }; -static struct kobject *floppy_find(dev_t dev, int *part, void *data) -{ - struct swim_priv *swd = data; - int drive = (*part & 3); - - if (drive >= swd->floppy_count) - return NULL; - - *part = 0; - return get_disk_and_module(swd->unit[drive].disk); -} - static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) { struct floppy_state *fs = &swd->unit[swd->floppy_count]; @@ -846,9 +834,6 @@ static int swim_floppy_init(struct swim_priv *swd) add_disk(swd->unit[drive].disk); } - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, - floppy_find, NULL, swd); - return 0; exit_put_disks: @@ -932,8 +917,6 @@ static int swim_remove(struct platform_device *dev) int drive; struct resource *res; - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); - for (drive = 0; drive < swd->floppy_count; drive++) { del_gendisk(swd->unit[drive].disk); blk_cleanup_queue(swd->unit[drive].disk->queue); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 616ced8a8677..db3903e24d78 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -472,7 +472,7 @@ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) cap_str_10, cap_str_2); - set_capacity_revalidate_and_notify(vblk->disk, capacity, true); + set_capacity_and_notify(vblk->disk, capacity); } static void virtblk_config_changed_work(struct work_struct *work) @@ -600,7 +600,6 @@ static void virtblk_update_cache_mode(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; blk_queue_write_cache(vblk->disk->queue, writeback, false); - revalidate_disk_size(vblk->disk, true); } static const char *const virtblk_cache_types[] = { diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index a1b9df2c4ef1..b0c71d3a81a0 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -356,9 +356,7 @@ struct pending_req { }; -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : \ - get_capacity((_v)->bdev->bd_disk)) +#define vbd_sz(_v) bdev_nr_sectors((_v)->bdev) #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define xen_blkif_put(_b) \ diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 76912c584a76..9860d4842f36 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -274,6 +274,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) if (ring->xenblkd) { kthread_stop(ring->xenblkd); + ring->xenblkd = NULL; wake_up(&ring->shutdown_wq); } @@ -675,7 +676,8 @@ static int xen_blkbk_probe(struct xenbus_device *dev, /* setup back pointer */ be->blkif->be = be; - err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, + err = xenbus_watch_pathfmt(dev, &be->backend_watch, NULL, + backend_changed, "%s/%s", dev->nodename, "physical-device"); if (err) goto fail; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 48629d3433b4..188e0b47534b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2153,7 +2153,7 @@ static void blkfront_closing(struct blkfront_info *info) } if (info->gd) - bdev = bdget_disk(info->gd, 0); + bdev = bdgrab(info->gd->part0); mutex_unlock(&info->mutex); @@ -2370,7 +2370,7 @@ static void blkfront_connect(struct blkfront_info *info) return; printk(KERN_INFO "Setting capacity to %Lu\n", sectors); - set_capacity_revalidate_and_notify(info->gd, sectors, true); + set_capacity_and_notify(info->gd, sectors); return; case BLKIF_STATE_SUSPENDED: @@ -2518,7 +2518,7 @@ static int blkfront_remove(struct xenbus_device *xbdev) disk = info->gd; if (disk) - bdev = bdget_disk(disk, 0); + bdev = bdgrab(disk->part0); info->xbdev = NULL; mutex_unlock(&info->mutex); @@ -2595,19 +2595,11 @@ static int blkif_open(struct block_device *bdev, fmode_t mode) static void blkif_release(struct gendisk *disk, fmode_t mode) { struct blkfront_info *info = disk->private_data; - struct block_device *bdev; struct xenbus_device *xbdev; mutex_lock(&blkfront_mutex); - - bdev = bdget_disk(disk, 0); - - if (!bdev) { - WARN(1, "Block device %s yanked out from us!\n", disk->disk_name); + if (disk->part0->bd_openers) goto out_mutex; - } - if (bdev->bd_openers) - goto out; /* * Check if we have been instructed to close. We will have @@ -2619,7 +2611,7 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) if (xbdev && xbdev->state == XenbusStateClosing) { /* pending switch to state closed */ - dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); + dev_info(disk_to_dev(disk), "releasing disk\n"); xlvbd_release_gendisk(info); xenbus_frontend_closed(info->xbdev); } @@ -2628,14 +2620,12 @@ static void blkif_release(struct gendisk *disk, fmode_t mode) if (!xbdev) { /* sudden device removal */ - dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); + dev_info(disk_to_dev(disk), "releasing disk\n"); xlvbd_release_gendisk(info); disk->private_data = NULL; free_info(info); } -out: - bdput(bdev); out_mutex: mutex_unlock(&blkfront_mutex); } diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 0e734802ee7c..c1d20818e649 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -42,7 +42,6 @@ #include - #define Z2MINOR_COMBINED (0) #define Z2MINOR_Z2ONLY (1) #define Z2MINOR_CHIPONLY (2) @@ -50,28 +49,28 @@ #define Z2MINOR_MEMLIST2 (5) #define Z2MINOR_MEMLIST3 (6) #define Z2MINOR_MEMLIST4 (7) -#define Z2MINOR_COUNT (8) /* Move this down when adding a new minor */ +#define Z2MINOR_COUNT (8) /* Move this down when adding a new minor */ #define Z2RAM_CHUNK1024 ( Z2RAM_CHUNKSIZE >> 10 ) static DEFINE_MUTEX(z2ram_mutex); -static u_long *z2ram_map = NULL; -static u_long z2ram_size = 0; -static int z2_count = 0; -static int chip_count = 0; -static int list_count = 0; -static int current_device = -1; +static u_long *z2ram_map = NULL; +static u_long z2ram_size = 0; +static int z2_count = 0; +static int chip_count = 0; +static int list_count = 0; +static int current_device = -1; static DEFINE_SPINLOCK(z2ram_lock); -static struct gendisk *z2ram_gendisk; +static struct gendisk *z2ram_gendisk[Z2MINOR_COUNT]; static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request *req = bd->rq; unsigned long start = blk_rq_pos(req) << 9; - unsigned long len = blk_rq_cur_bytes(req); + unsigned long len = blk_rq_cur_bytes(req); blk_mq_start_request(req); @@ -92,7 +91,7 @@ static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, if (len < size) size = len; - addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; + addr += z2ram_map[start >> Z2RAM_CHUNKSHIFT]; if (rq_data_dir(req) == READ) memcpy(buffer, (char *)addr, size); else @@ -106,323 +105,319 @@ static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, return BLK_STS_OK; } -static void -get_z2ram( void ) +static void get_z2ram(void) { - int i; + int i; - for ( i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++ ) - { - if ( test_bit( i, zorro_unused_z2ram ) ) - { - z2_count++; - z2ram_map[z2ram_size++] = (unsigned long)ZTWO_VADDR(Z2RAM_START) + - (i << Z2RAM_CHUNKSHIFT); - clear_bit( i, zorro_unused_z2ram ); + for (i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++) { + if (test_bit(i, zorro_unused_z2ram)) { + z2_count++; + z2ram_map[z2ram_size++] = + (unsigned long)ZTWO_VADDR(Z2RAM_START) + + (i << Z2RAM_CHUNKSHIFT); + clear_bit(i, zorro_unused_z2ram); + } } - } - return; + return; } -static void -get_chipram( void ) +static void get_chipram(void) { - while ( amiga_chip_avail() > ( Z2RAM_CHUNKSIZE * 4 ) ) - { - chip_count++; - z2ram_map[ z2ram_size ] = - (u_long)amiga_chip_alloc( Z2RAM_CHUNKSIZE, "z2ram" ); + while (amiga_chip_avail() > (Z2RAM_CHUNKSIZE * 4)) { + chip_count++; + z2ram_map[z2ram_size] = + (u_long) amiga_chip_alloc(Z2RAM_CHUNKSIZE, "z2ram"); - if ( z2ram_map[ z2ram_size ] == 0 ) - { - break; + if (z2ram_map[z2ram_size] == 0) { + break; + } + + z2ram_size++; } - z2ram_size++; - } - - return; + return; } static int z2_open(struct block_device *bdev, fmode_t mode) { - int device; - int max_z2_map = ( Z2RAM_SIZE / Z2RAM_CHUNKSIZE ) * - sizeof( z2ram_map[0] ); - int max_chip_map = ( amiga_chip_size / Z2RAM_CHUNKSIZE ) * - sizeof( z2ram_map[0] ); - int rc = -ENOMEM; + int device; + int max_z2_map = (Z2RAM_SIZE / Z2RAM_CHUNKSIZE) * sizeof(z2ram_map[0]); + int max_chip_map = (amiga_chip_size / Z2RAM_CHUNKSIZE) * + sizeof(z2ram_map[0]); + int rc = -ENOMEM; - device = MINOR(bdev->bd_dev); + device = MINOR(bdev->bd_dev); - mutex_lock(&z2ram_mutex); - if ( current_device != -1 && current_device != device ) - { - rc = -EBUSY; - goto err_out; - } + mutex_lock(&z2ram_mutex); + if (current_device != -1 && current_device != device) { + rc = -EBUSY; + goto err_out; + } - if ( current_device == -1 ) - { - z2_count = 0; - chip_count = 0; - list_count = 0; - z2ram_size = 0; + if (current_device == -1) { + z2_count = 0; + chip_count = 0; + list_count = 0; + z2ram_size = 0; - /* Use a specific list entry. */ - if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) { - int index = device - Z2MINOR_MEMLIST1 + 1; - unsigned long size, paddr, vaddr; + /* Use a specific list entry. */ + if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) { + int index = device - Z2MINOR_MEMLIST1 + 1; + unsigned long size, paddr, vaddr; - if (index >= m68k_realnum_memory) { - printk( KERN_ERR DEVICE_NAME - ": no such entry in z2ram_map\n" ); - goto err_out; - } + if (index >= m68k_realnum_memory) { + printk(KERN_ERR DEVICE_NAME + ": no such entry in z2ram_map\n"); + goto err_out; + } - paddr = m68k_memory[index].addr; - size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE-1); + paddr = m68k_memory[index].addr; + size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE - 1); #ifdef __powerpc__ - /* FIXME: ioremap doesn't build correct memory tables. */ - { - vfree(vmalloc (size)); - } + /* FIXME: ioremap doesn't build correct memory tables. */ + { + vfree(vmalloc(size)); + } - vaddr = (unsigned long)ioremap_wt(paddr, size); + vaddr = (unsigned long)ioremap_wt(paddr, size); #else - vaddr = (unsigned long)z_remap_nocache_nonser(paddr, size); + vaddr = + (unsigned long)z_remap_nocache_nonser(paddr, size); #endif - z2ram_map = - kmalloc_array(size / Z2RAM_CHUNKSIZE, - sizeof(z2ram_map[0]), - GFP_KERNEL); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; + z2ram_map = + kmalloc_array(size / Z2RAM_CHUNKSIZE, + sizeof(z2ram_map[0]), GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + while (size) { + z2ram_map[z2ram_size++] = vaddr; + size -= Z2RAM_CHUNKSIZE; + vaddr += Z2RAM_CHUNKSIZE; + list_count++; + } + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK List Entry %d Memory\n", + list_count * Z2RAM_CHUNK1024, index); + } else + switch (device) { + case Z2MINOR_COMBINED: + + z2ram_map = + kmalloc(max_z2_map + max_chip_map, + GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_z2ram(); + get_chipram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n", + z2_count * Z2RAM_CHUNK1024, + chip_count * Z2RAM_CHUNK1024, + (z2_count + + chip_count) * Z2RAM_CHUNK1024); + + break; + + case Z2MINOR_Z2ONLY: + z2ram_map = kmalloc(max_z2_map, GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_z2ram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK of Zorro II RAM\n", + z2_count * Z2RAM_CHUNK1024); + + break; + + case Z2MINOR_CHIPONLY: + z2ram_map = kmalloc(max_chip_map, GFP_KERNEL); + if (z2ram_map == NULL) { + printk(KERN_ERR DEVICE_NAME + ": cannot get mem for z2ram_map\n"); + goto err_out; + } + + get_chipram(); + + if (z2ram_size != 0) + printk(KERN_INFO DEVICE_NAME + ": using %iK Chip RAM\n", + chip_count * Z2RAM_CHUNK1024); + + break; + + default: + rc = -ENODEV; + goto err_out; + + break; + } + + if (z2ram_size == 0) { + printk(KERN_NOTICE DEVICE_NAME + ": no unused ZII/Chip RAM found\n"); + goto err_out_kfree; } - while (size) { - z2ram_map[ z2ram_size++ ] = vaddr; - size -= Z2RAM_CHUNKSIZE; - vaddr += Z2RAM_CHUNKSIZE; - list_count++; - } - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK List Entry %d Memory\n", - list_count * Z2RAM_CHUNK1024, index ); - } else - - switch ( device ) - { - case Z2MINOR_COMBINED: - - z2ram_map = kmalloc( max_z2_map + max_chip_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } - - get_z2ram(); - get_chipram(); - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n", - z2_count * Z2RAM_CHUNK1024, - chip_count * Z2RAM_CHUNK1024, - ( z2_count + chip_count ) * Z2RAM_CHUNK1024 ); - - break; - - case Z2MINOR_Z2ONLY: - z2ram_map = kmalloc( max_z2_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } - - get_z2ram(); - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK of Zorro II RAM\n", - z2_count * Z2RAM_CHUNK1024 ); - - break; - - case Z2MINOR_CHIPONLY: - z2ram_map = kmalloc( max_chip_map, GFP_KERNEL ); - if ( z2ram_map == NULL ) - { - printk( KERN_ERR DEVICE_NAME - ": cannot get mem for z2ram_map\n" ); - goto err_out; - } - - get_chipram(); - - if ( z2ram_size != 0 ) - printk( KERN_INFO DEVICE_NAME - ": using %iK Chip RAM\n", - chip_count * Z2RAM_CHUNK1024 ); - - break; - - default: - rc = -ENODEV; - goto err_out; - - break; + current_device = device; + z2ram_size <<= Z2RAM_CHUNKSHIFT; + set_capacity(z2ram_gendisk[device], z2ram_size >> 9); } - if ( z2ram_size == 0 ) - { - printk( KERN_NOTICE DEVICE_NAME - ": no unused ZII/Chip RAM found\n" ); - goto err_out_kfree; - } - - current_device = device; - z2ram_size <<= Z2RAM_CHUNKSHIFT; - set_capacity(z2ram_gendisk, z2ram_size >> 9); - } - - mutex_unlock(&z2ram_mutex); - return 0; + mutex_unlock(&z2ram_mutex); + return 0; err_out_kfree: - kfree(z2ram_map); + kfree(z2ram_map); err_out: - mutex_unlock(&z2ram_mutex); - return rc; + mutex_unlock(&z2ram_mutex); + return rc; } -static void -z2_release(struct gendisk *disk, fmode_t mode) +static void z2_release(struct gendisk *disk, fmode_t mode) { - mutex_lock(&z2ram_mutex); - if ( current_device == -1 ) { - mutex_unlock(&z2ram_mutex); - return; - } - mutex_unlock(&z2ram_mutex); - /* - * FIXME: unmap memory - */ + mutex_lock(&z2ram_mutex); + if (current_device == -1) { + mutex_unlock(&z2ram_mutex); + return; + } + mutex_unlock(&z2ram_mutex); + /* + * FIXME: unmap memory + */ } -static const struct block_device_operations z2_fops = -{ - .owner = THIS_MODULE, - .open = z2_open, - .release = z2_release, +static const struct block_device_operations z2_fops = { + .owner = THIS_MODULE, + .open = z2_open, + .release = z2_release, }; -static struct kobject *z2_find(dev_t dev, int *part, void *data) -{ - *part = 0; - return get_disk_and_module(z2ram_gendisk); -} - -static struct request_queue *z2_queue; static struct blk_mq_tag_set tag_set; static const struct blk_mq_ops z2_mq_ops = { - .queue_rq = z2_queue_rq, + .queue_rq = z2_queue_rq, }; -static int __init -z2_init(void) +static int z2ram_register_disk(int minor) { - int ret; + struct request_queue *q; + struct gendisk *disk; - if (!MACH_IS_AMIGA) - return -ENODEV; + disk = alloc_disk(1); + if (!disk) + return -ENOMEM; - ret = -EBUSY; - if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME)) - goto err; + q = blk_mq_init_queue(&tag_set); + if (IS_ERR(q)) { + put_disk(disk); + return PTR_ERR(q); + } - ret = -ENOMEM; - z2ram_gendisk = alloc_disk(1); - if (!z2ram_gendisk) - goto out_disk; + disk->major = Z2RAM_MAJOR; + disk->first_minor = minor; + disk->fops = &z2_fops; + if (minor) + sprintf(disk->disk_name, "z2ram%d", minor); + else + sprintf(disk->disk_name, "z2ram"); + disk->queue = q; - z2_queue = blk_mq_init_sq_queue(&tag_set, &z2_mq_ops, 16, - BLK_MQ_F_SHOULD_MERGE); - if (IS_ERR(z2_queue)) { - ret = PTR_ERR(z2_queue); - z2_queue = NULL; - goto out_queue; - } + z2ram_gendisk[minor] = disk; + add_disk(disk); + return 0; +} - z2ram_gendisk->major = Z2RAM_MAJOR; - z2ram_gendisk->first_minor = 0; - z2ram_gendisk->fops = &z2_fops; - sprintf(z2ram_gendisk->disk_name, "z2ram"); +static int __init z2_init(void) +{ + int ret, i; - z2ram_gendisk->queue = z2_queue; - add_disk(z2ram_gendisk); - blk_register_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT, THIS_MODULE, - z2_find, NULL, NULL); + if (!MACH_IS_AMIGA) + return -ENODEV; - return 0; + if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME)) + return -EBUSY; -out_queue: - put_disk(z2ram_gendisk); -out_disk: - unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); -err: - return ret; + tag_set.ops = &z2_mq_ops; + tag_set.nr_hw_queues = 1; + tag_set.nr_maps = 1; + tag_set.queue_depth = 16; + tag_set.numa_node = NUMA_NO_NODE; + tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ret = blk_mq_alloc_tag_set(&tag_set); + if (ret) + goto out_unregister_blkdev; + + for (i = 0; i < Z2MINOR_COUNT; i++) { + ret = z2ram_register_disk(i); + if (ret && i == 0) + goto out_free_tagset; + } + + return 0; + +out_free_tagset: + blk_mq_free_tag_set(&tag_set); +out_unregister_blkdev: + unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); + return ret; } static void __exit z2_exit(void) { - int i, j; - blk_unregister_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT); - unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); - del_gendisk(z2ram_gendisk); - put_disk(z2ram_gendisk); - blk_cleanup_queue(z2_queue); - blk_mq_free_tag_set(&tag_set); + int i, j; - if ( current_device != -1 ) - { - i = 0; + unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); - for ( j = 0 ; j < z2_count; j++ ) - { - set_bit( i++, zorro_unused_z2ram ); + for (i = 0; i < Z2MINOR_COUNT; i++) { + del_gendisk(z2ram_gendisk[i]); + blk_cleanup_queue(z2ram_gendisk[i]->queue); + put_disk(z2ram_gendisk[i]); + } + blk_mq_free_tag_set(&tag_set); + + if (current_device != -1) { + i = 0; + + for (j = 0; j < z2_count; j++) { + set_bit(i++, zorro_unused_z2ram); + } + + for (j = 0; j < chip_count; j++) { + if (z2ram_map[i]) { + amiga_chip_free((void *)z2ram_map[i++]); + } + } + + if (z2ram_map != NULL) { + kfree(z2ram_map); + } } - for ( j = 0 ; j < chip_count; j++ ) - { - if ( z2ram_map[ i ] ) - { - amiga_chip_free( (void *) z2ram_map[ i++ ] ); - } - } - - if ( z2ram_map != NULL ) - { - kfree( z2ram_map ); - } - } - - return; -} + return; +} module_init(z2_init); module_exit(z2_exit); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 66a33e418940..e2933cb7a82a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -403,13 +403,10 @@ static void reset_bdev(struct zram *zram) return; bdev = zram->bdev; - if (zram->old_block_size) - set_blocksize(bdev, zram->old_block_size); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); /* hope filp_close flush all of IO */ filp_close(zram->backing_dev, NULL); zram->backing_dev = NULL; - zram->old_block_size = 0; zram->bdev = NULL; zram->disk->fops = &zram_devops; kvfree(zram->bitmap); @@ -454,7 +451,7 @@ static ssize_t backing_dev_store(struct device *dev, struct file *backing_dev = NULL; struct inode *inode; struct address_space *mapping; - unsigned int bitmap_sz, old_block_size = 0; + unsigned int bitmap_sz; unsigned long nr_pages, *bitmap = NULL; struct block_device *bdev = NULL; int err; @@ -509,14 +506,8 @@ static ssize_t backing_dev_store(struct device *dev, goto out; } - old_block_size = block_size(bdev); - err = set_blocksize(bdev, PAGE_SIZE); - if (err) - goto out; - reset_bdev(zram); - zram->old_block_size = old_block_size; zram->bdev = bdev; zram->backing_dev = backing_dev; zram->bitmap = bitmap; @@ -1710,8 +1701,8 @@ static void zram_reset_device(struct zram *zram) disksize = zram->disksize; zram->disksize = 0; - set_capacity(zram->disk, 0); - part_stat_set_all(&zram->disk->part0, 0); + set_capacity_and_notify(zram->disk, 0); + part_stat_set_all(zram->disk->part0, 0); up_write(&zram->init_lock); /* I/O operation under all of CPU are done so let's free */ @@ -1756,9 +1747,7 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - - revalidate_disk_size(zram->disk, true); + set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); return len; @@ -1786,15 +1775,12 @@ static ssize_t reset_store(struct device *dev, return -EINVAL; zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - if (!bdev) - return -ENOMEM; + bdev = zram->disk->part0; mutex_lock(&bdev->bd_mutex); /* Do not reset an active device or claimed device */ if (bdev->bd_openers || zram->claim) { mutex_unlock(&bdev->bd_mutex); - bdput(bdev); return -EBUSY; } @@ -1805,8 +1791,6 @@ static ssize_t reset_store(struct device *dev, /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - revalidate_disk_size(zram->disk, true); - bdput(bdev); mutex_lock(&bdev->bd_mutex); zram->claim = false; @@ -1992,16 +1976,11 @@ static int zram_add(void) static int zram_remove(struct zram *zram) { - struct block_device *bdev; - - bdev = bdget_disk(zram->disk, 0); - if (!bdev) - return -ENOMEM; + struct block_device *bdev = zram->disk->part0; mutex_lock(&bdev->bd_mutex); if (bdev->bd_openers || zram->claim) { mutex_unlock(&bdev->bd_mutex); - bdput(bdev); return -EBUSY; } @@ -2013,7 +1992,6 @@ static int zram_remove(struct zram *zram) /* Make sure all the pending I/O are finished */ fsync_bdev(bdev); zram_reset_device(zram); - bdput(bdev); pr_info("Removed device: %s\n", zram->disk->disk_name); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 9cabcbb13fd9..419a7e8281ee 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -119,7 +119,6 @@ struct zram { bool wb_limit_enable; u64 bd_wb_limit; struct block_device *bdev; - unsigned int old_block_size; unsigned long *bitmap; unsigned long nr_pages; #endif diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c index 9f7ed1fcd428..626dedd110cb 100644 --- a/drivers/bus/mips_cdmm.c +++ b/drivers/bus/mips_cdmm.c @@ -559,10 +559,8 @@ static void mips_cdmm_bus_discover(struct mips_cdmm_bus *bus) dev_set_name(&dev->dev, "cdmm%u-%u", cpu, id); ++id; ret = device_register(&dev->dev); - if (ret) { + if (ret) put_device(&dev->dev); - kfree(dev); - } } } diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 0c271b9e3c5b..8f0e52a71493 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2996,13 +2996,15 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, * SCSI-II devices are not required to support * READ_CD, so let's try switching block size */ - /* FIXME: switch back again... */ - ret = cdrom_switch_blocksize(cdi, blocksize); - if (ret) - goto out; + if (blocksize != CD_FRAMESIZE) { + ret = cdrom_switch_blocksize(cdi, blocksize); + if (ret) + goto out; + } cgc->sshdr = NULL; ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1); - ret |= cdrom_switch_blocksize(cdi, blocksize); + if (blocksize != CD_FRAMESIZE) + ret |= cdrom_switch_blocksize(cdi, CD_FRAMESIZE); } if (!ret && copy_to_user(arg, cgc->buffer, blocksize)) ret = -EFAULT; diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index fbdda9938039..6fb0c76bfbf8 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -503,6 +503,70 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, } EXPORT_SYMBOL_GPL(vmbus_establish_gpadl); +/** + * request_arr_init - Allocates memory for the requestor array. Each slot + * keeps track of the next available slot in the array. Initially, each + * slot points to the next one (as in a Linked List). The last slot + * does not point to anything, so its value is U64_MAX by default. + * @size The size of the array + */ +static u64 *request_arr_init(u32 size) +{ + int i; + u64 *req_arr; + + req_arr = kcalloc(size, sizeof(u64), GFP_KERNEL); + if (!req_arr) + return NULL; + + for (i = 0; i < size - 1; i++) + req_arr[i] = i + 1; + + /* Last slot (no more available slots) */ + req_arr[i] = U64_MAX; + + return req_arr; +} + +/* + * vmbus_alloc_requestor - Initializes @rqstor's fields. + * Index 0 is the first free slot + * @size: Size of the requestor array + */ +static int vmbus_alloc_requestor(struct vmbus_requestor *rqstor, u32 size) +{ + u64 *rqst_arr; + unsigned long *bitmap; + + rqst_arr = request_arr_init(size); + if (!rqst_arr) + return -ENOMEM; + + bitmap = bitmap_zalloc(size, GFP_KERNEL); + if (!bitmap) { + kfree(rqst_arr); + return -ENOMEM; + } + + rqstor->req_arr = rqst_arr; + rqstor->req_bitmap = bitmap; + rqstor->size = size; + rqstor->next_request_id = 0; + spin_lock_init(&rqstor->req_lock); + + return 0; +} + +/* + * vmbus_free_requestor - Frees memory allocated for @rqstor + * @rqstor: Pointer to the requestor struct + */ +static void vmbus_free_requestor(struct vmbus_requestor *rqstor) +{ + kfree(rqstor->req_arr); + bitmap_free(rqstor->req_bitmap); +} + static int __vmbus_open(struct vmbus_channel *newchannel, void *userdata, u32 userdatalen, void (*onchannelcallback)(void *context), void *context) @@ -523,6 +587,12 @@ static int __vmbus_open(struct vmbus_channel *newchannel, if (newchannel->state != CHANNEL_OPEN_STATE) return -EINVAL; + /* Create and init requestor */ + if (newchannel->rqstor_size) { + if (vmbus_alloc_requestor(&newchannel->requestor, newchannel->rqstor_size)) + return -ENOMEM; + } + newchannel->state = CHANNEL_OPENING_STATE; newchannel->onchannel_callback = onchannelcallback; newchannel->channel_callback_context = context; @@ -626,6 +696,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel, error_clean_ring: hv_ringbuffer_cleanup(&newchannel->outbound); hv_ringbuffer_cleanup(&newchannel->inbound); + vmbus_free_requestor(&newchannel->requestor); newchannel->state = CHANNEL_OPEN_STATE; return err; } @@ -808,6 +879,9 @@ static int vmbus_close_internal(struct vmbus_channel *channel) channel->ringbuffer_gpadlhandle = 0; } + if (!ret) + vmbus_free_requestor(&channel->requestor); + return ret; } @@ -888,7 +962,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, /* in 8-bytes granularity */ desc.offset8 = sizeof(struct vmpacket_descriptor) >> 3; desc.len8 = (u16)(packetlen_aligned >> 3); - desc.trans_id = requestid; + desc.trans_id = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ bufferlist[0].iov_base = &desc; bufferlist[0].iov_len = sizeof(struct vmpacket_descriptor); @@ -897,7 +971,7 @@ int vmbus_sendpacket(struct vmbus_channel *channel, void *buffer, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - return hv_ringbuffer_write(channel, bufferlist, num_vecs); + return hv_ringbuffer_write(channel, bufferlist, num_vecs, requestid); } EXPORT_SYMBOL(vmbus_sendpacket); @@ -939,7 +1013,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, desc.flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; desc.dataoffset8 = descsize >> 3; /* in 8-bytes granularity */ desc.length8 = (u16)(packetlen_aligned >> 3); - desc.transactionid = requestid; + desc.transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ desc.reserved = 0; desc.rangecount = pagecount; @@ -956,7 +1030,7 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - return hv_ringbuffer_write(channel, bufferlist, 3); + return hv_ringbuffer_write(channel, bufferlist, 3, requestid); } EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); @@ -983,7 +1057,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; desc->dataoffset8 = desc_size >> 3; /* in 8-bytes granularity */ desc->length8 = (u16)(packetlen_aligned >> 3); - desc->transactionid = requestid; + desc->transactionid = VMBUS_RQST_ERROR; /* will be updated in hv_ringbuffer_write() */ desc->reserved = 0; desc->rangecount = 1; @@ -994,7 +1068,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - return hv_ringbuffer_write(channel, bufferlist, 3); + return hv_ringbuffer_write(channel, bufferlist, 3, requestid); } EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); @@ -1042,3 +1116,91 @@ int vmbus_recvpacket_raw(struct vmbus_channel *channel, void *buffer, buffer_actual_len, requestid, true); } EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw); + +/* + * vmbus_next_request_id - Returns a new request id. It is also + * the index at which the guest memory address is stored. + * Uses a spin lock to avoid race conditions. + * @rqstor: Pointer to the requestor struct + * @rqst_add: Guest memory address to be stored in the array + */ +u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr) +{ + unsigned long flags; + u64 current_id; + const struct vmbus_channel *channel = + container_of(rqstor, const struct vmbus_channel, requestor); + + /* Check rqstor has been initialized */ + if (!channel->rqstor_size) + return VMBUS_NO_RQSTOR; + + spin_lock_irqsave(&rqstor->req_lock, flags); + current_id = rqstor->next_request_id; + + /* Requestor array is full */ + if (current_id >= rqstor->size) { + spin_unlock_irqrestore(&rqstor->req_lock, flags); + return VMBUS_RQST_ERROR; + } + + rqstor->next_request_id = rqstor->req_arr[current_id]; + rqstor->req_arr[current_id] = rqst_addr; + + /* The already held spin lock provides atomicity */ + bitmap_set(rqstor->req_bitmap, current_id, 1); + + spin_unlock_irqrestore(&rqstor->req_lock, flags); + + /* + * Cannot return an ID of 0, which is reserved for an unsolicited + * message from Hyper-V. + */ + return current_id + 1; +} +EXPORT_SYMBOL_GPL(vmbus_next_request_id); + +/* + * vmbus_request_addr - Returns the memory address stored at @trans_id + * in @rqstor. Uses a spin lock to avoid race conditions. + * @rqstor: Pointer to the requestor struct + * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's + * next request id. + */ +u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id) +{ + unsigned long flags; + u64 req_addr; + const struct vmbus_channel *channel = + container_of(rqstor, const struct vmbus_channel, requestor); + + /* Check rqstor has been initialized */ + if (!channel->rqstor_size) + return VMBUS_NO_RQSTOR; + + /* Hyper-V can send an unsolicited message with ID of 0 */ + if (!trans_id) + return trans_id; + + spin_lock_irqsave(&rqstor->req_lock, flags); + + /* Data corresponding to trans_id is stored at trans_id - 1 */ + trans_id--; + + /* Invalid trans_id */ + if (trans_id >= rqstor->size || !test_bit(trans_id, rqstor->req_bitmap)) { + spin_unlock_irqrestore(&rqstor->req_lock, flags); + return VMBUS_RQST_ERROR; + } + + req_addr = rqstor->req_arr[trans_id]; + rqstor->req_arr[trans_id] = rqstor->next_request_id; + rqstor->next_request_id = trans_id; + + /* The already held spin lock provides atomicity */ + bitmap_clear(rqstor->req_bitmap, trans_id, 1); + + spin_unlock_irqrestore(&rqstor->req_lock, flags); + return req_addr; +} +EXPORT_SYMBOL_GPL(vmbus_request_addr); diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index eb56e09ae15f..8c471823a5af 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1198,6 +1198,7 @@ static void free_balloon_pages(struct hv_dynmem_device *dm, __ClearPageOffline(pg); __free_page(pg); dm->num_pages_ballooned--; + adjust_managed_page_count(pg, 1); } } @@ -1238,8 +1239,10 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, split_page(pg, get_order(alloc_unit << PAGE_SHIFT)); /* mark all pages offline */ - for (j = 0; j < (1 << get_order(alloc_unit << PAGE_SHIFT)); j++) + for (j = 0; j < alloc_unit; j++) { __SetPageOffline(pg + j); + adjust_managed_page_count(pg + j, -1); + } bl_resp->range_count++; bl_resp->range_array[i].finfo.start_page = diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 40e2b9f91163..9416e09ebd58 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -179,21 +179,21 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); int hv_ringbuffer_write(struct vmbus_channel *channel, - const struct kvec *kv_list, u32 kv_count); + const struct kvec *kv_list, u32 kv_count, + u64 requestid); int hv_ringbuffer_read(struct vmbus_channel *channel, void *buffer, u32 buflen, u32 *buffer_actual_len, u64 *requestid, bool raw); /* - * The Maximum number of channels (16348) is determined by the size of the + * The Maximum number of channels (16384) is determined by the size of the * interrupt page, which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to * send endpoint interrupts, and the other is to receive endpoint interrupts. */ #define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3) /* The value here must be in multiple of 32 */ -/* TODO: Need to make this configurable */ #define MAX_NUM_CHANNELS_SUPPORTED 256 #define MAX_CHANNEL_RELIDS \ diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 356e22159e83..35833d4d1a1d 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -248,7 +248,8 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) /* Write to the ring buffer. */ int hv_ringbuffer_write(struct vmbus_channel *channel, - const struct kvec *kv_list, u32 kv_count) + const struct kvec *kv_list, u32 kv_count, + u64 requestid) { int i; u32 bytes_avail_towrite; @@ -258,6 +259,8 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, u64 prev_indices; unsigned long flags; struct hv_ring_buffer_info *outring_info = &channel->outbound; + struct vmpacket_descriptor *desc = kv_list[0].iov_base; + u64 rqst_id = VMBUS_NO_RQSTOR; if (channel->rescind) return -ENODEV; @@ -300,6 +303,23 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, kv_list[i].iov_len); } + /* + * Allocate the request ID after the data has been copied into the + * ring buffer. Once this request ID is allocated, the completion + * path could find the data and free it. + */ + + if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) { + rqst_id = vmbus_next_request_id(&channel->requestor, requestid); + if (rqst_id == VMBUS_RQST_ERROR) { + spin_unlock_irqrestore(&outring_info->ring_lock, flags); + pr_err("No request id available\n"); + return -EAGAIN; + } + } + desc = hv_get_ring_buffer(outring_info) + old_write; + desc->trans_id = (rqst_id == VMBUS_NO_RQSTOR) ? requestid : rqst_id; + /* Set previous packet start */ prev_indices = hv_get_ring_bufferindices(outring_info); @@ -319,8 +339,13 @@ int hv_ringbuffer_write(struct vmbus_channel *channel, hv_signal_on_write(old_write, channel); - if (channel->rescind) + if (channel->rescind) { + if (rqst_id != VMBUS_NO_RQSTOR) { + /* Reclaim request ID to avoid leak of IDs */ + vmbus_request_addr(&channel->requestor, rqst_id); + } return -ENODEV; + } return 0; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 4fad3e6745e5..502f8cd95f6d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -55,7 +55,7 @@ int vmbus_interrupt; /* * Boolean to control whether to report panic messages over Hyper-V. * - * It can be set via /proc/sys/kernel/hyperv/record_panic_msg + * It can be set via /proc/sys/kernel/hyperv_record_panic_msg */ static int sysctl_record_panic_msg = 1; @@ -156,6 +156,7 @@ static u32 channel_conn_id(struct vmbus_channel *channel, { u8 monitor_group = channel_monitor_group(channel); u8 monitor_offset = channel_monitor_offset(channel); + return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; } @@ -550,6 +551,7 @@ static ssize_t vendor_show(struct device *dev, char *buf) { struct hv_device *hv_dev = device_to_hv_device(dev); + return sprintf(buf, "0x%x\n", hv_dev->vendor_id); } static DEVICE_ATTR_RO(vendor); @@ -559,6 +561,7 @@ static ssize_t device_show(struct device *dev, char *buf) { struct hv_device *hv_dev = device_to_hv_device(dev); + return sprintf(buf, "0x%x\n", hv_dev->device_id); } static DEVICE_ATTR_RO(device); @@ -1384,6 +1387,24 @@ static struct kmsg_dumper hv_kmsg_dumper = { .dump = hv_kmsg_dump, }; +static void hv_kmsg_dump_register(void) +{ + int ret; + + hv_panic_page = hv_alloc_hyperv_zeroed_page(); + if (!hv_panic_page) { + pr_err("Hyper-V: panic message page memory allocation failed\n"); + return; + } + + ret = kmsg_dump_register(&hv_kmsg_dumper); + if (ret) { + pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); + hv_free_hyperv_page((unsigned long)hv_panic_page); + hv_panic_page = NULL; + } +} + static struct ctl_table_header *hv_ctl_table_hdr; /* @@ -1474,21 +1495,8 @@ static int vmbus_bus_init(void) * capability is supported by the hypervisor. */ hv_get_crash_ctl(hyperv_crash_ctl); - if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { - hv_panic_page = (void *)hv_alloc_hyperv_zeroed_page(); - if (hv_panic_page) { - ret = kmsg_dump_register(&hv_kmsg_dumper); - if (ret) { - pr_err("Hyper-V: kmsg dump register " - "error 0x%x\n", ret); - hv_free_hyperv_page( - (unsigned long)hv_panic_page); - hv_panic_page = NULL; - } - } else - pr_err("Hyper-V: panic message page memory " - "allocation failed"); - } + if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) + hv_kmsg_dump_register(); register_die_notifier(&hyperv_die_block); } @@ -1812,7 +1820,7 @@ static ssize_t channel_pending_show(struct vmbus_channel *channel, channel_pending(channel, vmbus_connection.monitor_pages[1])); } -static VMBUS_CHAN_ATTR(pending, S_IRUGO, channel_pending_show, NULL); +static VMBUS_CHAN_ATTR(pending, 0444, channel_pending_show, NULL); static ssize_t channel_latency_show(struct vmbus_channel *channel, char *buf) @@ -1821,19 +1829,19 @@ static ssize_t channel_latency_show(struct vmbus_channel *channel, channel_latency(channel, vmbus_connection.monitor_pages[1])); } -static VMBUS_CHAN_ATTR(latency, S_IRUGO, channel_latency_show, NULL); +static VMBUS_CHAN_ATTR(latency, 0444, channel_latency_show, NULL); static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) { return sprintf(buf, "%llu\n", channel->interrupts); } -static VMBUS_CHAN_ATTR(interrupts, S_IRUGO, channel_interrupts_show, NULL); +static VMBUS_CHAN_ATTR(interrupts, 0444, channel_interrupts_show, NULL); static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) { return sprintf(buf, "%llu\n", channel->sig_events); } -static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL); +static VMBUS_CHAN_ATTR(events, 0444, channel_events_show, NULL); static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, char *buf) @@ -1872,7 +1880,7 @@ static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, { return sprintf(buf, "%u\n", channel->offermsg.monitorid); } -static VMBUS_CHAN_ATTR(monitor_id, S_IRUGO, subchannel_monitor_id_show, NULL); +static VMBUS_CHAN_ATTR(monitor_id, 0444, subchannel_monitor_id_show, NULL); static ssize_t subchannel_id_show(struct vmbus_channel *channel, char *buf) @@ -2377,7 +2385,7 @@ static int vmbus_bus_suspend(struct device *dev) * We wait here until the completion of any channel * offers that are currently in progress. */ - msleep(1); + usleep_range(1000, 2000); } mutex_lock(&vmbus_connection.channel_mutex); diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 430b29e0abdb..aefd74c0d862 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -902,65 +902,14 @@ static int init_irq (ide_hwif_t *hwif) return 1; } -static int ata_lock(dev_t dev, void *data) +static void ata_probe(dev_t dev) { - /* FIXME: we want to pin hwif down */ - return 0; + request_module("ide-disk"); + request_module("ide-cd"); + request_module("ide-tape"); + request_module("ide-floppy"); } -static struct kobject *ata_probe(dev_t dev, int *part, void *data) -{ - ide_hwif_t *hwif = data; - int unit = *part >> PARTN_BITS; - ide_drive_t *drive = hwif->devices[unit]; - - if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) - return NULL; - - if (drive->media == ide_disk) - request_module("ide-disk"); - if (drive->media == ide_cdrom || drive->media == ide_optical) - request_module("ide-cd"); - if (drive->media == ide_tape) - request_module("ide-tape"); - if (drive->media == ide_floppy) - request_module("ide-floppy"); - - return NULL; -} - -static struct kobject *exact_match(dev_t dev, int *part, void *data) -{ - struct gendisk *p = data; - *part &= (1 << PARTN_BITS) - 1; - return &disk_to_dev(p)->kobj; -} - -static int exact_lock(dev_t dev, void *data) -{ - struct gendisk *p = data; - - if (!get_disk_and_module(p)) - return -1; - return 0; -} - -void ide_register_region(struct gendisk *disk) -{ - blk_register_region(MKDEV(disk->major, disk->first_minor), - disk->minors, NULL, exact_match, exact_lock, disk); -} - -EXPORT_SYMBOL_GPL(ide_register_region); - -void ide_unregister_region(struct gendisk *disk) -{ - blk_unregister_region(MKDEV(disk->major, disk->first_minor), - disk->minors); -} - -EXPORT_SYMBOL_GPL(ide_unregister_region); - void ide_init_disk(struct gendisk *disk, ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; @@ -999,7 +948,7 @@ static int hwif_init(ide_hwif_t *hwif) return 0; } - if (register_blkdev(hwif->major, hwif->name)) + if (__register_blkdev(hwif->major, hwif->name, ata_probe)) return 0; if (!hwif->sg_max_nents) @@ -1021,8 +970,6 @@ static int hwif_init(ide_hwif_t *hwif) goto out; } - blk_register_region(MKDEV(hwif->major, 0), MAX_DRIVES << PARTN_BITS, - THIS_MODULE, ata_probe, ata_lock, hwif); return 1; out: @@ -1611,7 +1558,6 @@ static void ide_unregister(ide_hwif_t *hwif) /* * Remove us from the kernel's knowledge */ - blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<sg_table); unregister_blkdev(hwif->major, hwif->name); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 6f26634b22bb..88b96437b22e 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -1822,7 +1822,6 @@ static void ide_tape_remove(ide_drive_t *drive) ide_proc_unregister_driver(drive, tape->driver); device_del(&tape->dev); - ide_unregister_region(tape->disk); mutex_lock(&idetape_ref_mutex); put_device(&tape->dev); @@ -2026,7 +2025,6 @@ static int ide_tape_probe(ide_drive_t *drive) "n%s", tape->name); g->fops = &idetape_block_ops; - ide_register_region(g); return 0; diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c index 97c68731406b..1dddba11e721 100644 --- a/drivers/lightnvm/pblk-core.c +++ b/drivers/lightnvm/pblk-core.c @@ -1869,6 +1869,10 @@ void pblk_gen_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv, struct pblk_line_ws *line_ws; line_ws = mempool_alloc(&pblk->gen_ws_pool, gfp_mask); + if (!line_ws) { + pblk_err(pblk, "pblk: could not allocate memory\n"); + return; + } line_ws->pblk = pblk; line_ws->line = line; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 214326383145..85b1f2a9b72d 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -475,7 +475,7 @@ struct search { unsigned int read_dirty_data:1; unsigned int cache_missed:1; - struct hd_struct *part; + struct block_device *part; unsigned long start_time; struct btree_op op; @@ -1073,7 +1073,7 @@ struct detached_dev_io_private { unsigned long start_time; bio_end_io_t *bi_end_io; void *bi_private; - struct hd_struct *part; + struct block_device *part; }; static void detached_dev_end_io(struct bio *bio) @@ -1230,8 +1230,9 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, if (dc->io_disable) return -EIO; - - return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); + if (!dc->bdev->bd_disk->fops->ioctl) + return -ENOTTY; + return dc->bdev->bd_disk->fops->ioctl(dc->bdev, mode, cmd, arg); } void bch_cached_dev_request_init(struct cached_dev *dc) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 46a00134a36a..0e06d721cd8e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1114,9 +1114,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) static void cached_dev_detach_finish(struct work_struct *w) { struct cached_dev *dc = container_of(w, struct cached_dev, detach); - struct closure cl; - - closure_init_stack(&cl); BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)); BUG_ON(refcount_read(&dc->count)); @@ -1130,12 +1127,6 @@ static void cached_dev_detach_finish(struct work_struct *w) dc->writeback_thread = NULL; } - memset(&dc->sb.set_uuid, 0, 16); - SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); - - bch_write_bdev_super(dc, &cl); - closure_sync(&cl); - mutex_lock(&bch_register_lock); calc_cached_dev_sectors(dc->disk.c); @@ -1408,7 +1399,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) q->limits.raid_partial_stripes_expensive; ret = bcache_device_init(&dc->disk, block_size, - dc->bdev->bd_part->nr_sects - dc->sb.data_offset, + bdev_nr_sectors(dc->bdev) - dc->sb.data_offset, dc->bdev, &bcache_cached_ops); if (ret) return ret; @@ -1447,8 +1438,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto err; err = "error creating kobject"; - if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, - "bcache")) + if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache")) goto err; if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; @@ -2342,9 +2332,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto err; } - if (kobject_add(&ca->kobj, - &part_to_dev(bdev->bd_part)->kobj, - "bcache")) { + if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) { err = "error calling kobject_add"; ret = -ENOMEM; goto out; @@ -2383,38 +2371,38 @@ kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); -static bool bch_is_open_backing(struct block_device *bdev) +static bool bch_is_open_backing(dev_t dev) { struct cache_set *c, *tc; struct cached_dev *dc, *t; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) list_for_each_entry_safe(dc, t, &c->cached_devs, list) - if (dc->bdev == bdev) + if (dc->bdev->bd_dev == dev) return true; list_for_each_entry_safe(dc, t, &uncached_devices, list) - if (dc->bdev == bdev) + if (dc->bdev->bd_dev == dev) return true; return false; } -static bool bch_is_open_cache(struct block_device *bdev) +static bool bch_is_open_cache(dev_t dev) { struct cache_set *c, *tc; list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { struct cache *ca = c->cache; - if (ca->bdev == bdev) + if (ca->bdev->bd_dev == dev) return true; } return false; } -static bool bch_is_open(struct block_device *bdev) +static bool bch_is_open(dev_t dev) { - return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); + return bch_is_open_cache(dev) || bch_is_open_backing(dev); } struct async_reg_args { @@ -2538,9 +2526,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, sb); if (IS_ERR(bdev)) { if (bdev == ERR_PTR(-EBUSY)) { - bdev = lookup_bdev(strim(path)); + dev_t dev; + mutex_lock(&bch_register_lock); - if (!IS_ERR(bdev) && bch_is_open(bdev)) + if (lookup_bdev(strim(path), &dev) == 0 && + bch_is_open(dev)) err = "device already registered"; else err = "device busy"; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 3c74996978da..a129e4d2707c 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -705,6 +705,15 @@ static int bch_writeback_thread(void *arg) * bch_cached_dev_detach(). */ if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) { + struct closure cl; + + closure_init_stack(&cl); + memset(&dc->sb.set_uuid, 0, 16); + SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE); + + bch_write_bdev_super(dc, &cl); + closure_sync(&cl); + up_write(&dc->writeback_lock); break; } diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 805f2ea72782..ca443ca98eb7 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -100,19 +100,12 @@ struct mapped_device { */ struct workqueue_struct *wq; - /* - * freeze/thaw support require holding onto a super block - */ - struct super_block *frozen_sb; - /* forced geometry settings */ struct hd_geometry geometry; /* kobject and completion */ struct dm_kobject_holder kobj_holder; - struct block_device *bdev; - struct dm_stats stats; /* for blk-mq request-based DM support */ diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 56b723d012ac..23c38777e8f6 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -700,8 +700,7 @@ static void rs_set_capacity(struct raid_set *rs) { struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); - set_capacity(gendisk, rs->md.array_sectors); - revalidate_disk_size(gendisk, true); + set_capacity_and_notify(gendisk, rs->md.array_sectors); } /* diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 729a72ec30cc..13b4385f4d5a 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -397,7 +397,7 @@ static int map_request(struct dm_rq_target_io *tio) } /* The target has remapped the I/O so dispatch it */ - trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), + trace_block_rq_remap(clone, disk_devt(dm_disk(md)), blk_rq_pos(rq)); ret = dm_dispatch_clone_request(clone, rq); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index d1b2de85f0ab..c6d63d82c9fe 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -349,16 +349,9 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, dev_t dm_get_dev_t(const char *path) { dev_t dev; - struct block_device *bdev; - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) + if (lookup_bdev(path, &dev)) dev = name_to_dev_t(path); - else { - dev = bdev->bd_dev; - bdput(bdev); - } - return dev; } EXPORT_SYMBOL_GPL(dm_get_dev_t); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 69ff0f8ee88b..1aea5e089d08 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -571,7 +571,10 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, } } - r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); + if (!bdev->bd_disk->fops->ioctl) + r = -ENOTTY; + else + r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); out: dm_unprepare_ioctl(md, srcu_idx); return r; @@ -1275,8 +1278,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) break; case DM_MAPIO_REMAPPED: /* the bio has been remapped so dispatch it */ - trace_block_bio_remap(clone->bi_disk->queue, clone, - bio_dev(io->orig_bio), sector); + trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector); ret = submit_bio_noacct(clone); break; case DM_MAPIO_KILL: @@ -1421,18 +1423,12 @@ static int __send_empty_flush(struct clone_info *ci) */ bio_init(&flush_bio, NULL, 0); flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; + flush_bio.bi_disk = ci->io->md->disk; + bio_associate_blkg(&flush_bio); + ci->bio = &flush_bio; ci->sector_count = 0; - /* - * Empty flush uses a statically initialized bio, as the base for - * cloning. However, blkg association requires that a bdev is - * associated with a gendisk, which doesn't happen until the bdev is - * opened. So, blkg association is done at issue time of the flush - * rather than when the device is created in alloc_dev(). - */ - bio_set_dev(ci->bio, ci->io->md->bdev); - BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); @@ -1612,12 +1608,12 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * (by eliminating DM's splitting and just using bio_split) */ part_stat_lock(); - __dm_part_stat_sub(&dm_disk(md)->part0, + __dm_part_stat_sub(dm_disk(md)->part0, sectors[op_stat_group(bio_op(bio))], ci.sector_count); part_stat_unlock(); bio_chain(b, bio); - trace_block_split(md->queue, b, bio->bi_iter.bi_sector); + trace_block_split(b, bio->bi_iter.bi_sector); ret = submit_bio_noacct(bio); break; } @@ -1753,11 +1749,6 @@ static void cleanup_mapped_device(struct mapped_device *md) cleanup_srcu_struct(&md->io_barrier); - if (md->bdev) { - bdput(md->bdev); - md->bdev = NULL; - } - mutex_destroy(&md->suspend_lock); mutex_destroy(&md->type_lock); mutex_destroy(&md->table_devices_lock); @@ -1849,10 +1840,6 @@ static struct mapped_device *alloc_dev(int minor) if (!md->wq) goto bad; - md->bdev = bdget_disk(md->disk, 0); - if (!md->bdev) - goto bad; - dm_stats_init(&md->stats); /* Populate the mapping, nobody knows we exist yet */ @@ -1977,8 +1964,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (size != dm_get_size(md)) memset(&md->geometry, 0, sizeof(md->geometry)); - set_capacity(md->disk, size); - bd_set_nr_sectors(md->bdev, size); + set_capacity_and_notify(md->disk, size); dm_table_event_callback(t, event_callback, md); @@ -2419,7 +2405,7 @@ EXPORT_SYMBOL_GPL(dm_put); static bool md_in_flight_bios(struct mapped_device *md) { int cpu; - struct hd_struct *part = &dm_disk(md)->part0; + struct block_device *part = dm_disk(md)->part0; long sum = 0; for_each_possible_cpu(cpu) { @@ -2554,27 +2540,19 @@ static int lock_fs(struct mapped_device *md) { int r; - WARN_ON(md->frozen_sb); + WARN_ON(test_bit(DMF_FROZEN, &md->flags)); - md->frozen_sb = freeze_bdev(md->bdev); - if (IS_ERR(md->frozen_sb)) { - r = PTR_ERR(md->frozen_sb); - md->frozen_sb = NULL; - return r; - } - - set_bit(DMF_FROZEN, &md->flags); - - return 0; + r = freeze_bdev(md->disk->part0); + if (!r) + set_bit(DMF_FROZEN, &md->flags); + return r; } static void unlock_fs(struct mapped_device *md) { if (!test_bit(DMF_FROZEN, &md->flags)) return; - - thaw_bdev(md->bdev, md->frozen_sb); - md->frozen_sb = NULL; + thaw_bdev(md->disk->part0); clear_bit(DMF_FROZEN, &md->flags); } diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 4aaf4820b6f6..7fbd41e156c9 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -581,8 +581,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) process_metadata_update(mddev, msg); break; case CHANGE_CAPACITY: - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); break; case RESYNCING: set_bit(MD_RESYNCING_REMOTE, &mddev->recovery); @@ -664,9 +663,27 @@ static void recv_daemon(struct md_thread *thread) * Takes the lock on the TOKEN lock resource so no other * node can communicate while the operation is underway. */ -static int lock_token(struct md_cluster_info *cinfo, bool mddev_locked) +static int lock_token(struct md_cluster_info *cinfo) { - int error, set_bit = 0; + int error; + + error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); + if (error) { + pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n", + __func__, __LINE__, error); + } else { + /* Lock the receive sequence */ + mutex_lock(&cinfo->recv_mutex); + } + return error; +} + +/* lock_comm() + * Sets the MD_CLUSTER_SEND_LOCK bit to lock the send channel. + */ +static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked) +{ + int rv, set_bit = 0; struct mddev *mddev = cinfo->mddev; /* @@ -677,34 +694,19 @@ static int lock_token(struct md_cluster_info *cinfo, bool mddev_locked) */ if (mddev_locked && !test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state)) { - error = test_and_set_bit_lock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, + rv = test_and_set_bit_lock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); - WARN_ON_ONCE(error); + WARN_ON_ONCE(rv); md_wakeup_thread(mddev->thread); set_bit = 1; } - error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX); - if (set_bit) - clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); - if (error) - pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n", - __func__, __LINE__, error); - - /* Lock the receive sequence */ - mutex_lock(&cinfo->recv_mutex); - return error; -} - -/* lock_comm() - * Sets the MD_CLUSTER_SEND_LOCK bit to lock the send channel. - */ -static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked) -{ wait_event(cinfo->wait, !test_and_set_bit(MD_CLUSTER_SEND_LOCK, &cinfo->state)); - - return lock_token(cinfo, mddev_locked); + rv = lock_token(cinfo); + if (set_bit) + clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); + return rv; } static void unlock_comm(struct md_cluster_info *cinfo) @@ -784,9 +786,11 @@ static int sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg, { int ret; - lock_comm(cinfo, mddev_locked); - ret = __sendmsg(cinfo, cmsg); - unlock_comm(cinfo); + ret = lock_comm(cinfo, mddev_locked); + if (!ret) { + ret = __sendmsg(cinfo, cmsg); + unlock_comm(cinfo); + } return ret; } @@ -1061,7 +1065,7 @@ static int metadata_update_start(struct mddev *mddev) return 0; } - ret = lock_token(cinfo, 1); + ret = lock_token(cinfo); clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state); return ret; } @@ -1255,7 +1259,10 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors) int raid_slot = -1; md_update_sb(mddev, 1); - lock_comm(cinfo, 1); + if (lock_comm(cinfo, 1)) { + pr_err("%s: lock_comm failed\n", __func__); + return; + } memset(&cmsg, 0, sizeof(cmsg)); cmsg.type = cpu_to_le32(METADATA_UPDATED); @@ -1296,13 +1303,10 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors) if (ret) pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n", __func__, __LINE__); - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); } else { /* revert to previous sectors */ ret = mddev->pers->resize(mddev, old_dev_sectors); - if (!ret) - revalidate_disk_size(mddev->gendisk, true); ret = __sendmsg(cinfo, &cmsg); if (ret) pr_err("%s:%d: failed to send METADATA_UPDATED msg\n", @@ -1407,7 +1411,8 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev) cmsg.type = cpu_to_le32(NEWDISK); memcpy(cmsg.uuid, uuid, 16); cmsg.raid_slot = cpu_to_le32(rdev->desc_nr); - lock_comm(cinfo, 1); + if (lock_comm(cinfo, 1)) + return -EAGAIN; ret = __sendmsg(cinfo, &cmsg); if (ret) { unlock_comm(cinfo); diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 5ab22069b5be..68cac7d19278 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -200,9 +200,8 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev) "copied raid_disks doesn't match mddev->raid_disks"); rcu_assign_pointer(mddev->private, newconf); md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); - set_capacity(mddev->gendisk, mddev->array_sectors); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); mddev_resume(mddev); - revalidate_disk_size(mddev->gendisk, true); kfree_rcu(oldconf, rcu); return 0; } @@ -258,8 +257,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) bio_endio(bio); } else { if (mddev->gendisk) - trace_block_bio_remap(bio->bi_disk->queue, - bio, disk_devt(mddev->gendisk), + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, bio); mddev_check_write_zeroes(mddev, bio); diff --git a/drivers/md/md.c b/drivers/md/md.c index 0037c6ecab65..ca409428b4fc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -464,7 +464,7 @@ struct md_io { bio_end_io_t *orig_bi_end_io; void *orig_bi_private; unsigned long start_time; - struct hd_struct *part; + struct block_device *part; }; static void md_end_io(struct bio *bio) @@ -639,7 +639,7 @@ static void md_submit_flush_data(struct work_struct *ws) * could wait for this and below md_handle_request could wait for those * bios because of suspend check */ - mddev->last_flush = mddev->start_flush; + mddev->prev_flush_start = mddev->start_flush; mddev->flush_bio = NULL; wake_up(&mddev->sb_wait); @@ -660,13 +660,17 @@ static void md_submit_flush_data(struct work_struct *ws) */ bool md_flush_request(struct mddev *mddev, struct bio *bio) { - ktime_t start = ktime_get_boottime(); + ktime_t req_start = ktime_get_boottime(); spin_lock_irq(&mddev->lock); + /* flush requests wait until ongoing flush completes, + * hence coalescing all the pending requests. + */ wait_event_lock_irq(mddev->sb_wait, !mddev->flush_bio || - ktime_after(mddev->last_flush, start), + ktime_before(req_start, mddev->prev_flush_start), mddev->lock); - if (!ktime_after(mddev->last_flush, start)) { + /* new request after previous flush is completed */ + if (ktime_after(req_start, mddev->prev_flush_start)) { WARN_ON(mddev->flush_bio); mddev->flush_bio = bio; bio = NULL; @@ -2414,7 +2418,6 @@ EXPORT_SYMBOL(md_integrity_add_rdev); static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) { char b[BDEVNAME_SIZE]; - struct kobject *ko; int err; /* prevent duplicates */ @@ -2477,9 +2480,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; - ko = &part_to_dev(rdev->bdev->bd_part)->kobj; /* failure here is OK */ - err = sysfs_create_link(&rdev->kobj, ko, "block"); + err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block"); rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); rdev->sysfs_unack_badblocks = sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks"); @@ -5355,10 +5357,9 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len) if (!err) { mddev->array_sectors = sectors; - if (mddev->pers) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); - } + if (mddev->pers) + set_capacity_and_notify(mddev->gendisk, + mddev->array_sectors); } mddev_unlock(mddev); return err ?: len; @@ -5765,11 +5766,12 @@ static int md_alloc(dev_t dev, char *name) return error; } -static struct kobject *md_probe(dev_t dev, int *part, void *data) +static void md_probe(dev_t dev) { + if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512) + return; if (create_on_open) md_alloc(dev, NULL); - return NULL; } static int add_named_array(const char *val, const struct kernel_param *kp) @@ -6107,8 +6109,7 @@ int do_md_run(struct mddev *mddev) md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); clear_bit(MD_NOT_READY, &mddev->flags); mddev->changed = 1; kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); @@ -6423,10 +6424,9 @@ static int do_md_stop(struct mddev *mddev, int mode, if (rdev->raid_disk >= 0) sysfs_unlink_rdev(mddev, rdev); - set_capacity(disk, 0); + set_capacity_and_notify(disk, 0); mutex_unlock(&mddev->open_mutex); mddev->changed = 1; - revalidate_disk_size(disk, true); if (mddev->ro) mddev->ro = 0; @@ -6535,7 +6535,7 @@ static void autorun_devices(int part) break; } - md_probe(dev, NULL, NULL); + md_probe(dev); mddev = mddev_find(dev); if (!mddev || !mddev->gendisk) { if (mddev) @@ -6948,8 +6948,10 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev) goto busy; kick_rdev: - if (mddev_is_clustered(mddev)) - md_cluster_ops->remove_disk(mddev, rdev); + if (mddev_is_clustered(mddev)) { + if (md_cluster_ops->remove_disk(mddev, rdev)) + goto busy; + } md_kick_rdev_from_array(rdev); set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); @@ -7257,8 +7259,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) if (mddev_is_clustered(mddev)) md_cluster_ops->update_size(mddev, old_dev_sectors); else if (mddev->queue) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); + set_capacity_and_notify(mddev->gendisk, + mddev->array_sectors); } } return rv; @@ -7278,6 +7280,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) return -EINVAL; if (mddev->sync_thread || test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || + test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) || mddev->reshape_position != MaxSector) return -EBUSY; @@ -7480,7 +7483,6 @@ static inline bool md_ioctl_valid(unsigned int cmd) { switch (cmd) { case ADD_NEW_DISK: - case BLKROSET: case GET_ARRAY_INFO: case GET_BITMAP_FILE: case GET_DISK_INFO: @@ -7507,7 +7509,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, int err = 0; void __user *argp = (void __user *)arg; struct mddev *mddev = NULL; - int ro; bool did_set_md_closing = false; if (!md_ioctl_valid(cmd)) @@ -7590,8 +7591,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } - WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags)); - set_bit(MD_CLOSING, &mddev->flags); + if (test_and_set_bit(MD_CLOSING, &mddev->flags)) { + mutex_unlock(&mddev->open_mutex); + err = -EBUSY; + goto out; + } did_set_md_closing = true; mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); @@ -7687,35 +7691,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, goto unlock; } break; - - case BLKROSET: - if (get_user(ro, (int __user *)(arg))) { - err = -EFAULT; - goto unlock; - } - err = -EINVAL; - - /* if the bdev is going readonly the value of mddev->ro - * does not matter, no writes are coming - */ - if (ro) - goto unlock; - - /* are we are already prepared for writes? */ - if (mddev->ro != 1) - goto unlock; - - /* transitioning to readauto need only happen for - * arrays that call md_write_start - */ - if (mddev->pers) { - err = restart_array(mddev); - if (err == 0) { - mddev->ro = 2; - set_disk_ro(mddev->gendisk, 0); - } - } - goto unlock; } /* @@ -7809,6 +7784,36 @@ static int md_compat_ioctl(struct block_device *bdev, fmode_t mode, } #endif /* CONFIG_COMPAT */ +static int md_set_read_only(struct block_device *bdev, bool ro) +{ + struct mddev *mddev = bdev->bd_disk->private_data; + int err; + + err = mddev_lock(mddev); + if (err) + return err; + + if (!mddev->raid_disks && !mddev->external) { + err = -ENODEV; + goto out_unlock; + } + + /* + * Transitioning to read-auto need only happen for arrays that call + * md_write_start and which are not ready for writes yet. + */ + if (!ro && mddev->ro == 1 && mddev->pers) { + err = restart_array(mddev); + if (err) + goto out_unlock; + mddev->ro = 2; + } + +out_unlock: + mddev_unlock(mddev); + return err; +} + static int md_open(struct block_device *bdev, fmode_t mode) { /* @@ -7886,6 +7891,7 @@ const struct block_device_operations md_fops = #endif .getgeo = md_getgeo, .check_events = md_check_events, + .set_read_only = md_set_read_only, }; static int md_thread(void *arg) @@ -8445,7 +8451,7 @@ static int is_mddev_idle(struct mddev *mddev, int init) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_disk; - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and @@ -9015,10 +9021,9 @@ void md_do_sync(struct md_thread *thread) mddev_lock_nointr(mddev); md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0)); mddev_unlock(mddev); - if (!mddev_is_clustered(mddev)) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk_size(mddev->gendisk, true); - } + if (!mddev_is_clustered(mddev)) + set_capacity_and_notify(mddev->gendisk, + mddev->array_sectors); } spin_lock(&mddev->lock); @@ -9547,18 +9552,15 @@ static int __init md_init(void) if (!md_rdev_misc_wq) goto err_rdev_misc_wq; - if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) + ret = __register_blkdev(MD_MAJOR, "md", md_probe); + if (ret < 0) goto err_md; - if ((ret = register_blkdev(0, "mdp")) < 0) + ret = __register_blkdev(0, "mdp", md_probe); + if (ret < 0) goto err_mdp; mdp_major = ret; - blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE, - md_probe, NULL, NULL); - blk_register_region(MKDEV(mdp_major, 0), 1UL<raid_disks != le32_to_cpu(sb->raid_disks)) - update_raid_disks(mddev, le32_to_cpu(sb->raid_disks)); + if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) { + ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks)); + if (ret) + pr_warn("md: updating array disks failed. %d\n", ret); + } /* * Since mddev->delta_disks has already updated in update_raid_disks, @@ -9825,9 +9830,6 @@ static __exit void md_exit(void) struct list_head *tmp; int delay = 1; - blk_unregister_region(MKDEV(MD_MAJOR,0), 512); - blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS); - unregister_blkdev(MD_MAJOR,"md"); unregister_blkdev(mdp_major, "mdp"); unregister_reboot_notifier(&md_notifier); diff --git a/drivers/md/md.h b/drivers/md/md.h index 2175a5ac4f7c..34070ab30a8a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -495,9 +495,9 @@ struct mddev { */ struct bio *flush_bio; atomic_t flush_pending; - ktime_t start_flush, last_flush; /* last_flush is when the last completed - * flush was started. - */ + ktime_t start_flush, prev_flush_start; /* prev_flush_start is when the previous completed + * flush was started. + */ struct work_struct flush_work; struct work_struct event_work; /* used by dm to report failure event */ mempool_t *serial_info_pool; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 35843df15b5e..67f157f2525d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -508,8 +508,8 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) bio_chain(discard_bio, bio); bio_clone_blkg_association(discard_bio, bio); if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(rdev->bdev), - discard_bio, disk_devt(mddev->gendisk), + trace_block_bio_remap(discard_bio, + disk_devt(mddev->gendisk), bio->bi_iter.bi_sector); submit_bio_noacct(discard_bio); } @@ -581,8 +581,8 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) tmp_dev->data_offset; if (mddev->gendisk) - trace_block_bio_remap(bio->bi_disk->queue, bio, - disk_devt(mddev->gendisk), bio_sector); + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), + bio_sector); mddev_check_writesame(mddev, bio); mddev_check_write_zeroes(mddev, bio); submit_bio_noacct(bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 960d854c07f8..c0347997f6ff 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1305,8 +1305,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r1_bio; if (mddev->gendisk) - trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, - disk_devt(mddev->gendisk), r1_bio->sector); + trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), + r1_bio->sector); submit_bio_noacct(read_bio); } @@ -1517,8 +1517,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, atomic_inc(&r1_bio->remaining); if (mddev->gendisk) - trace_block_bio_remap(mbio->bi_disk->queue, - mbio, disk_devt(mddev->gendisk), + trace_block_bio_remap(mbio, disk_devt(mddev->gendisk), r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ mbio->bi_disk = (void *)conf->mirrors[i].rdev; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3b598a3cb462..c5d88ef6a45c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1128,7 +1128,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, struct md_rdev *err_rdev = NULL; gfp_t gfp = GFP_NOIO; - if (r10_bio->devs[slot].rdev) { + if (slot >= 0 && r10_bio->devs[slot].rdev) { /* * This is an error retry, but we cannot * safely dereference the rdev in the r10_bio, @@ -1201,8 +1201,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r10_bio; if (mddev->gendisk) - trace_block_bio_remap(read_bio->bi_disk->queue, - read_bio, disk_devt(mddev->gendisk), + trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), r10_bio->sector); submit_bio_noacct(read_bio); return; @@ -1251,8 +1250,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_private = r10_bio; if (conf->mddev->gendisk) - trace_block_bio_remap(mbio->bi_disk->queue, - mbio, disk_devt(conf->mddev->gendisk), + trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk), r10_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ mbio->bi_disk = (void *)rdev; @@ -1493,6 +1491,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) r10_bio->mddev = mddev; r10_bio->sector = bio->bi_iter.bi_sector; r10_bio->state = 0; + r10_bio->read_slot = -1; memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies); if (bio_data_dir(bio) == READ) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 39343479ac2a..3a90cc0e43ca 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1222,9 +1222,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); if (conf->mddev->gendisk) - trace_block_bio_remap(bi->bi_disk->queue, - bi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + trace_block_bio_remap(bi, + disk_devt(conf->mddev->gendisk), + sh->dev[i].sector); if (should_defer && op_is_write(op)) bio_list_add(&pending_bios, bi); else @@ -1272,9 +1272,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) if (op == REQ_OP_DISCARD) rbi->bi_vcnt = 0; if (conf->mddev->gendisk) - trace_block_bio_remap(rbi->bi_disk->queue, - rbi, disk_devt(conf->mddev->gendisk), - sh->dev[i].sector); + trace_block_bio_remap(rbi, + disk_devt(conf->mddev->gendisk), + sh->dev[i].sector); if (should_defer && op_is_write(op)) bio_list_add(&pending_bios, rbi); else @@ -5468,8 +5468,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) spin_unlock_irq(&conf->device_lock); if (mddev->gendisk) - trace_block_bio_remap(align_bi->bi_disk->queue, - align_bi, disk_devt(mddev->gendisk), + trace_block_bio_remap(align_bi, disk_devt(mddev->gendisk), raid_bio->bi_iter.bi_sector); submit_bio_noacct(align_bi); return 1; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 0c05f77f9b21..fb8e12d590a1 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -298,38 +298,10 @@ static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo) return ret; } -static int blktrans_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk); - int ret = -ENXIO; - - if (!dev) - return ret; - - mutex_lock(&dev->lock); - - if (!dev->mtd) - goto unlock; - - switch (cmd) { - case BLKFLSBUF: - ret = dev->tr->flush ? dev->tr->flush(dev) : 0; - break; - default: - ret = -ENOTTY; - } -unlock: - mutex_unlock(&dev->lock); - blktrans_dev_put(dev); - return ret; -} - static const struct block_device_operations mtd_block_ops = { .owner = THIS_MODULE, .open = blktrans_open, .release = blktrans_release, - .ioctl = blktrans_ioctl, .getgeo = blktrans_getgeo, }; diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index c3e2098372f2..38b6aa849c63 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -120,8 +120,8 @@ int get_tree_mtd(struct fs_context *fc, struct fs_context *fc)) { #ifdef CONFIG_BLOCK - struct block_device *bdev; - int ret, major; + dev_t dev; + int ret; #endif int mtdnr; @@ -169,20 +169,15 @@ int get_tree_mtd(struct fs_context *fc, /* try the old way - the hack where we allowed users to mount * /dev/mtdblock$(n) but didn't actually _use_ the blockdev */ - bdev = lookup_bdev(fc->source); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); + ret = lookup_bdev(fc->source, &dev); + if (ret) { errorf(fc, "MTD: Couldn't look up '%s': %d", fc->source, ret); return ret; } pr_debug("MTDSB: lookup_bdev() returned 0\n"); - major = MAJOR(bdev->bd_dev); - mtdnr = MINOR(bdev->bd_dev); - bdput(bdev); - - if (major == MTD_BLOCK_MAJOR) - return mtd_get_sb_by_nr(fc, mtdnr, fill_super); + if (MAJOR(dev) == MTD_BLOCK_MAJOR) + return mtd_get_sb_by_nr(fc, MINOR(dev), fill_super); #endif /* CONFIG_BLOCK */ diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 95ab871d8d59..a0e0d8a83681 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -1145,7 +1145,7 @@ static struct sock *chtls_recv_sock(struct sock *lsk, fl6.daddr = ip6h->saddr; fl6.fl6_dport = inet_rsk(oreq)->ir_rmt_port; fl6.fl6_sport = htons(inet_rsk(oreq)->ir_num); - security_req_classify_flow(oreq, flowi6_to_flowi(&fl6)); + security_req_classify_flow(oreq, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(sock_net(lsk), lsk, &fl6, NULL); if (IS_ERR(dst)) goto free_sk; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index a0f338cf1424..2a87cfa27ac0 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -847,6 +847,19 @@ struct nvsp_message { #define NETVSC_XDP_HDRM 256 +#define NETVSC_MIN_OUT_MSG_SIZE (sizeof(struct vmpacket_descriptor) + \ + sizeof(struct nvsp_message)) +#define NETVSC_MIN_IN_MSG_SIZE sizeof(struct vmpacket_descriptor) + +/* Estimated requestor size: + * out_ring_size/min_out_msg_size + in_ring_size/min_in_msg_size + */ +static inline u32 netvsc_rqstor_size(unsigned long ringbytes) +{ + return ringbytes / NETVSC_MIN_OUT_MSG_SIZE + + ringbytes / NETVSC_MIN_IN_MSG_SIZE; +} + #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \ (offsetof(struct vmtransfer_page_packet_header, ranges) + \ (rng_cnt) * sizeof(struct vmtransfer_page_range)) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index fa8341f8359a..2350342b961f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -50,7 +50,7 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) vmbus_sendpacket(dev->channel, init_pkt, sizeof(struct nvsp_message), - (unsigned long)init_pkt, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); } @@ -163,7 +163,7 @@ static void netvsc_revoke_recv_buf(struct hv_device *device, ret = vmbus_sendpacket(device->channel, revoke_packet, sizeof(struct nvsp_message), - (unsigned long)revoke_packet, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); /* If the failure is because the channel is rescinded; * ignore the failure since we cannot send on a rescinded @@ -213,7 +213,7 @@ static void netvsc_revoke_send_buf(struct hv_device *device, ret = vmbus_sendpacket(device->channel, revoke_packet, sizeof(struct nvsp_message), - (unsigned long)revoke_packet, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); /* If the failure is because the channel is rescinded; @@ -557,7 +557,7 @@ static int negotiate_nvsp_ver(struct hv_device *device, ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message), - (unsigned long)init_packet, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); return ret; @@ -614,7 +614,7 @@ static int netvsc_connect_vsp(struct hv_device *device, /* Send the init request */ ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message), - (unsigned long)init_packet, + VMBUS_RQST_ID_NO_RESPONSE, VM_PKT_DATA_INBAND, 0); if (ret != 0) goto cleanup; @@ -695,10 +695,19 @@ static void netvsc_send_tx_complete(struct net_device *ndev, const struct vmpacket_descriptor *desc, int budget) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct sk_buff *skb; u16 q_idx = 0; int queue_sends; + u64 cmd_rqst; + + cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id); + if (cmd_rqst == VMBUS_RQST_ERROR) { + netdev_err(ndev, "Incorrect transaction id\n"); + return; + } + + skb = (struct sk_buff *)(unsigned long)cmd_rqst; /* Notify the layer above us */ if (likely(skb)) { @@ -1520,6 +1529,7 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device, netvsc_poll, NAPI_POLL_WEIGHT); /* Open the channel */ + device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); ret = vmbus_open(device->channel, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, net_device->chan_table); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 2c2b55c32a7a..598713c0d5a8 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1173,6 +1173,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) /* Set the channel before opening.*/ nvchan->channel = new_sc; + new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes); ret = vmbus_open(new_sc, netvsc_ring_bytes, netvsc_ring_bytes, NULL, 0, netvsc_channel_cb, nvchan); @@ -1224,6 +1225,11 @@ int rndis_set_subchannel(struct net_device *ndev, return -EIO; } + /* Check that number of allocated sub channel is within the expected range */ + if (init_packet->msg.v5_msg.subchn_comp.num_subchannels > nvdev->num_chn - 1) { + netdev_err(ndev, "invalid number of allocated sub channel\n"); + return -EINVAL; + } nvdev->num_chn = 1 + init_packet->msg.v5_msg.subchn_comp.num_subchannels; diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index c33e2c81635f..410b318e57fb 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -49,7 +49,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb, rt = dst_cache_get_ip4(cache, &fl.saddr); if (!rt) { - security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); + security_sk_classify_flow(sock, flowi4_to_flowi_common(&fl)); if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, fl.saddr, RT_SCOPE_HOST))) { endpoint->src4.s_addr = 0; @@ -129,7 +129,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, dst = dst_cache_get_ip6(cache, &fl.saddr); if (!dst) { - security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); + security_sk_classify_flow(sock, flowi6_to_flowi_common(&fl)); if (unlikely(!ipv6_addr_any(&fl.saddr) && !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { endpoint->src6 = fl.saddr = in6addr_any; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index f1c1624cec8f..6f10e0998f1c 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -557,12 +557,14 @@ static int xen_register_credit_watch(struct xenbus_device *dev, return -ENOMEM; snprintf(node, maxlen, "%s/rate", dev->nodename); vif->credit_watch.node = node; + vif->credit_watch.will_handle = NULL; vif->credit_watch.callback = xen_net_rate_changed; err = register_xenbus_watch(&vif->credit_watch); if (err) { pr_err("Failed to set watcher %s\n", vif->credit_watch.node); kfree(node); vif->credit_watch.node = NULL; + vif->credit_watch.will_handle = NULL; vif->credit_watch.callback = NULL; } return err; @@ -609,6 +611,7 @@ static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, snprintf(node, maxlen, "%s/request-multicast-control", dev->otherend); vif->mcast_ctrl_watch.node = node; + vif->mcast_ctrl_watch.will_handle = NULL; vif->mcast_ctrl_watch.callback = xen_mcast_ctrl_changed; err = register_xenbus_watch(&vif->mcast_ctrl_watch); if (err) { @@ -616,6 +619,7 @@ static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, vif->mcast_ctrl_watch.node); kfree(node); vif->mcast_ctrl_watch.node = NULL; + vif->mcast_ctrl_watch.will_handle = NULL; vif->mcast_ctrl_watch.callback = NULL; } return err; @@ -820,7 +824,7 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, hotplug_status_changed, "%s/%s", dev->nodename, "hotplug-status"); if (!err) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 9a270e49df17..ce1b61519441 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -85,7 +85,7 @@ static LIST_HEAD(nvme_subsystems); static DEFINE_MUTEX(nvme_subsystems_lock); static DEFINE_IDA(nvme_instance_ida); -static dev_t nvme_chr_devt; +static dev_t nvme_ctrl_base_chr_devt; static struct class *nvme_class; static struct class *nvme_subsys_class; @@ -93,16 +93,6 @@ static void nvme_put_subsystem(struct nvme_subsystem *subsys); static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid); -static void nvme_update_bdev_size(struct gendisk *disk) -{ - struct block_device *bdev = bdget_disk(disk, 0); - - if (bdev) { - bd_set_nr_sectors(bdev, get_capacity(disk)); - bdput(bdev); - } -} - /* * Prepare a queue for teardown. * @@ -119,8 +109,7 @@ static void nvme_set_queue_dying(struct nvme_ns *ns) blk_set_queue_dying(ns->queue); blk_mq_unquiesce_queue(ns->queue); - set_capacity(ns->disk, 0); - nvme_update_bdev_size(ns->disk); + set_capacity_and_notify(ns->disk, 0); } static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -148,6 +137,38 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_try_sched_reset); +static void nvme_failfast_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = container_of(to_delayed_work(work), + struct nvme_ctrl, failfast_work); + + if (ctrl->state != NVME_CTRL_CONNECTING) + return; + + set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); + dev_info(ctrl->device, "failfast expired\n"); + nvme_kick_requeue_lists(ctrl); +} + +static inline void nvme_start_failfast_work(struct nvme_ctrl *ctrl) +{ + if (!ctrl->opts || ctrl->opts->fast_io_fail_tmo == -1) + return; + + schedule_delayed_work(&ctrl->failfast_work, + ctrl->opts->fast_io_fail_tmo * HZ); +} + +static inline void nvme_stop_failfast_work(struct nvme_ctrl *ctrl) +{ + if (!ctrl->opts) + return; + + cancel_delayed_work_sync(&ctrl->failfast_work); + clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); +} + + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -433,8 +454,17 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, } spin_unlock_irqrestore(&ctrl->lock, flags); - if (changed && ctrl->state == NVME_CTRL_LIVE) + if (!changed) + return false; + + if (ctrl->state == NVME_CTRL_LIVE) { + if (old_state == NVME_CTRL_CONNECTING) + nvme_stop_failfast_work(ctrl); nvme_kick_requeue_lists(ctrl); + } else if (ctrl->state == NVME_CTRL_CONNECTING && + old_state == NVME_CTRL_RESETTING) { + nvme_start_failfast_work(ctrl); + } return changed; } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); @@ -518,29 +548,49 @@ static inline void nvme_clear_nvme_request(struct request *req) } } -struct request *nvme_alloc_request(struct request_queue *q, - struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) +static inline unsigned int nvme_req_op(struct nvme_command *cmd) { - unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; - struct request *req; + return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; +} - if (qid == NVME_QID_ANY) { - req = blk_mq_alloc_request(q, op, flags); - } else { - req = blk_mq_alloc_request_hctx(q, op, flags, - qid ? qid - 1 : 0); - } - if (IS_ERR(req)) - return req; +static inline void nvme_init_request(struct request *req, + struct nvme_command *cmd) +{ + if (req->q->queuedata) + req->timeout = NVME_IO_TIMEOUT; + else /* no queuedata implies admin queue */ + req->timeout = NVME_ADMIN_TIMEOUT; req->cmd_flags |= REQ_FAILFAST_DRIVER; nvme_clear_nvme_request(req); nvme_req(req)->cmd = cmd; +} +struct request *nvme_alloc_request(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags) +{ + struct request *req; + + req = blk_mq_alloc_request(q, nvme_req_op(cmd), flags); + if (!IS_ERR(req)) + nvme_init_request(req, cmd); return req; } EXPORT_SYMBOL_GPL(nvme_alloc_request); +struct request *nvme_alloc_request_qid(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid) +{ + struct request *req; + + req = blk_mq_alloc_request_hctx(q, nvme_req_op(cmd), flags, + qid ? qid - 1 : 0); + if (!IS_ERR(req)) + nvme_init_request(req, cmd); + return req; +} +EXPORT_SYMBOL_GPL(nvme_alloc_request_qid); + static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) { struct nvme_command c; @@ -897,11 +947,15 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, struct request *req; int ret; - req = nvme_alloc_request(q, cmd, flags, qid); + if (qid == NVME_QID_ANY) + req = nvme_alloc_request(q, cmd, flags); + else + req = nvme_alloc_request_qid(q, cmd, flags, qid); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + req->timeout = timeout; if (buffer && bufflen) { ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL); @@ -1067,11 +1121,12 @@ static int nvme_submit_user_cmd(struct request_queue *q, void *meta = NULL; int ret; - req = nvme_alloc_request(q, cmd, 0, NVME_QID_ANY); + req = nvme_alloc_request(q, cmd, 0); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + req->timeout = timeout; nvme_req(req)->flags |= NVME_REQ_USERCMD; if (ubuffer && bufflen) { @@ -1141,8 +1196,8 @@ static int nvme_keep_alive(struct nvme_ctrl *ctrl) { struct request *rq; - rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, BLK_MQ_REQ_RESERVED, - NVME_QID_ANY); + rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd, + BLK_MQ_REQ_RESERVED); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -1302,7 +1357,8 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, NVME_IDENTIFY_DATA_SIZE); if (status) { dev_warn(ctrl->device, - "Identify Descriptors failed (%d)\n", status); + "Identify Descriptors failed (nsid=%u, status=0x%x)\n", + nsid, status); goto free_data; } @@ -2053,12 +2109,13 @@ static void nvme_update_disk_info(struct gendisk *disk, capacity = 0; } - set_capacity_revalidate_and_notify(disk, capacity, false); + set_capacity_and_notify(disk, capacity); nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); - if (id->nsattr & NVME_NS_ATTR_RO) + if ((id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)) set_disk_ro(disk, true); } @@ -2134,7 +2191,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); blk_queue_update_readahead(ns->head->disk->queue); - nvme_update_bdev_size(ns->head->disk); blk_mq_unfreeze_queue(ns->head->disk->queue); } #endif @@ -2261,13 +2317,13 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, cmd.common.cdw10 = cpu_to_le32(((u32)secp) << 24 | ((u32)spsp) << 8); cmd.common.cdw11 = cpu_to_le32(len); - return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, - ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false); + return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, 0, + NVME_QID_ANY, 1, 0, false); } EXPORT_SYMBOL_GPL(nvme_sec_submit); #endif /* CONFIG_BLK_SED_OPAL */ -static const struct block_device_operations nvme_fops = { +static const struct block_device_operations nvme_bdev_ops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, .compat_ioctl = nvme_compat_ioctl, @@ -3274,7 +3330,7 @@ static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); - if (disk->fops == &nvme_fops) + if (disk->fops == &nvme_bdev_ops) return nvme_get_ns_from_dev(dev)->head; else return disk->private_data; @@ -3383,7 +3439,7 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, } #ifdef CONFIG_NVME_MULTIPATH if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) { - if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */ + if (dev_to_disk(dev)->fops != &nvme_bdev_ops) /* per-path attr */ return 0; if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; @@ -3804,7 +3860,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, struct gendisk *disk; struct nvme_id_ns *id; char disk_name[DISK_NAME_LEN]; - int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT, ret; + int node = ctrl->numa_node, flags = GENHD_FL_EXT_DEVT; if (nvme_identify_ns(ctrl, nsid, ids, &id)) return; @@ -3828,8 +3884,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, ns->ctrl = ctrl; kref_init(&ns->kref); - ret = nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED); - if (ret) + if (nvme_init_ns_head(ns, nsid, ids, id->nmic & NVME_NS_NMIC_SHARED)) goto out_free_queue; nvme_set_disk_name(disk_name, ns, ctrl, &flags); @@ -3837,7 +3892,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, if (!disk) goto out_unlink_ns; - disk->fops = &nvme_fops; + disk->fops = &nvme_bdev_ops; disk->private_data = ns; disk->queue = ns->queue; disk->flags = flags; @@ -3848,8 +3903,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_put_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { - ret = nvme_nvm_register(ns, disk_name, node); - if (ret) { + if (nvme_nvm_register(ns, disk_name, node)) { dev_warn(ctrl->device, "LightNVM init failure\n"); goto out_put_disk; } @@ -3962,8 +4016,6 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) */ if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR))) nvme_ns_remove(ns); - else - revalidate_disk_size(ns->disk, true); } static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) @@ -4042,8 +4094,11 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) ret = nvme_submit_sync_cmd(ctrl->admin_q, &cmd, ns_list, NVME_IDENTIFY_DATA_SIZE); - if (ret) + if (ret) { + dev_warn(ctrl->device, + "Identify NS List failed (status=0x%x)\n", ret); goto free; + } for (i = 0; i < nr_entries; i++) { u32 nsid = le32_to_cpu(ns_list[i]); @@ -4346,6 +4401,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_stop_keep_alive(ctrl); + nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); } @@ -4423,6 +4479,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, int ret; ctrl->state = NVME_CTRL_NEW; + clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); spin_lock_init(&ctrl->lock); mutex_init(&ctrl->scan_lock); INIT_LIST_HEAD(&ctrl->namespaces); @@ -4439,6 +4496,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, init_waitqueue_head(&ctrl->state_wq); INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work); + INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work); memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd)); ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive; @@ -4457,7 +4515,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, device_initialize(&ctrl->ctrl_device); ctrl->device = &ctrl->ctrl_device; - ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance); + ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt), + ctrl->instance); ctrl->device->class = nvme_class; ctrl->device->parent = ctrl->dev; ctrl->device->groups = nvme_dev_attr_groups; @@ -4666,7 +4725,8 @@ static int __init nvme_core_init(void) if (!nvme_delete_wq) goto destroy_reset_wq; - result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); + result = alloc_chrdev_region(&nvme_ctrl_base_chr_devt, 0, + NVME_MINORS, "nvme"); if (result < 0) goto destroy_delete_wq; @@ -4687,7 +4747,7 @@ static int __init nvme_core_init(void) destroy_class: class_destroy(nvme_class); unregister_chrdev: - unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); + unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_delete_wq: destroy_workqueue(nvme_delete_wq); destroy_reset_wq: @@ -4702,7 +4762,7 @@ static void __exit nvme_core_exit(void) { class_destroy(nvme_subsys_class); class_destroy(nvme_class); - unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); + unregister_chrdev_region(nvme_ctrl_base_chr_devt, NVME_MINORS); destroy_workqueue(nvme_delete_wq); destroy_workqueue(nvme_reset_wq); destroy_workqueue(nvme_wq); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 8575724734e0..72ac00173500 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -549,6 +549,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl, { if (ctrl->state != NVME_CTRL_DELETING_NOIO && ctrl->state != NVME_CTRL_DEAD && + !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) && !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE; @@ -615,6 +616,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" }, { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_TOS, "tos=%d" }, + { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, { NVMF_OPT_ERR, NULL } }; @@ -634,6 +636,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; opts->kato = NVME_DEFAULT_KATO; opts->duplicate_connect = false; + opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO; opts->hdr_digest = false; opts->data_digest = false; opts->tos = -1; /* < 0 == use transport default */ @@ -754,6 +757,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n"); ctrl_loss_tmo = token; break; + case NVMF_OPT_FAIL_FAST_TMO: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + + if (token >= 0) + pr_warn("I/O fail on reconnect controller after %d sec\n", + token); + opts->fast_io_fail_tmo = token; + break; case NVMF_OPT_HOSTNQN: if (opts->host) { pr_err("hostnqn already user-assigned: %s\n", @@ -884,11 +898,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->nr_poll_queues = 0; opts->duplicate_connect = true; } - if (ctrl_loss_tmo < 0) + if (ctrl_loss_tmo < 0) { opts->max_reconnects = -1; - else + } else { opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, opts->reconnect_delay); + if (ctrl_loss_tmo < opts->fast_io_fail_tmo) + pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", + opts->fast_io_fail_tmo, ctrl_loss_tmo); + } if (!opts->host) { kref_get(&nvmf_default_host->ref); @@ -988,7 +1006,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\ - NVMF_OPT_DISABLE_SQFLOW) + NVMF_OPT_DISABLE_SQFLOW |\ + NVMF_OPT_FAIL_FAST_TMO) static struct nvme_ctrl * nvmf_create_ctrl(struct device *dev, const char *buf) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a9c1e3b4585e..733010d2eafd 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -15,6 +15,8 @@ #define NVMF_DEF_RECONNECT_DELAY 10 /* default to 600 seconds of reconnect attempts before giving up */ #define NVMF_DEF_CTRL_LOSS_TMO 600 +/* default is -1: the fail fast mechanism is disabled */ +#define NVMF_DEF_FAIL_FAST_TMO -1 /* * Define a host as seen by the target. We allocate one at boot, but also @@ -56,6 +58,7 @@ enum { NVMF_OPT_NR_WRITE_QUEUES = 1 << 17, NVMF_OPT_NR_POLL_QUEUES = 1 << 18, NVMF_OPT_TOS = 1 << 19, + NVMF_OPT_FAIL_FAST_TMO = 1 << 20, }; /** @@ -89,6 +92,7 @@ enum { * @nr_write_queues: number of queues for write I/O * @nr_poll_queues: number of queues for polling I/O * @tos: type of service + * @fast_io_fail_tmo: Fast I/O fail timeout in seconds */ struct nvmf_ctrl_options { unsigned mask; @@ -111,6 +115,7 @@ struct nvmf_ctrl_options { unsigned int nr_write_queues; unsigned int nr_poll_queues; int tos; + int fast_io_fail_tmo; }; /* diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index f4c246462658..38373a0e86ef 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3479,7 +3479,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->lport->ops->fcprqst_priv_sz); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index 8e562d0f2c30..470cef3abec3 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -653,7 +653,7 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q, nvme_nvm_rqtocmd(rqd, ns, cmd); - rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY); + rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0); if (IS_ERR(rq)) return rq; @@ -767,14 +767,14 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, DECLARE_COMPLETION_ONSTACK(wait); int ret = 0; - rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0, - NVME_QID_ANY); + rq = nvme_alloc_request(q, (struct nvme_command *)vcmd, 0); if (IS_ERR(rq)) { ret = -ENOMEM; goto err_cmd; } - rq->timeout = timeout ? timeout : ADMIN_TIMEOUT; + if (timeout) + rq->timeout = timeout; if (ppa_buf && ppa_len) { ppa_list = dma_pool_alloc(dev->dma_pool, GFP_KERNEL, &ppa_dma); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 74896be40c17..9ac762b28811 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -279,6 +279,8 @@ static bool nvme_available_path(struct nvme_ns_head *head) struct nvme_ns *ns; list_for_each_entry_rcu(ns, &head->list, siblings) { + if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags)) + continue; switch (ns->ctrl->state) { case NVME_CTRL_LIVE: case NVME_CTRL_RESETTING: @@ -312,8 +314,7 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) if (likely(ns)) { bio->bi_disk = ns->disk; bio->bi_opf |= REQ_NVME_MPATH; - trace_block_bio_remap(bio->bi_disk->queue, bio, - disk_devt(ns->head->disk), + trace_block_bio_remap(bio, disk_devt(ns->head->disk), bio->bi_iter.bi_sector); ret = submit_bio_noacct(bio); } else if (nvme_available_path(head)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 567f7ad18a91..7e49f61f81df 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -24,7 +24,7 @@ extern unsigned int nvme_io_timeout; #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) extern unsigned int admin_timeout; -#define ADMIN_TIMEOUT (admin_timeout * HZ) +#define NVME_ADMIN_TIMEOUT (admin_timeout * HZ) #define NVME_DEFAULT_KATO 5 #define NVME_KATO_GRACE 10 @@ -178,7 +178,8 @@ static inline u16 nvme_req_qid(struct request *req) { if (!req->q->queuedata) return 0; - return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1; + + return req->mq_hctx->queue_num + 1; } /* The below value is the specific amount of delay needed before checking @@ -298,6 +299,7 @@ struct nvme_ctrl { struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; + struct delayed_work failfast_work; struct nvme_command ka_cmd; struct work_struct fw_act_work; unsigned long events; @@ -331,6 +333,8 @@ struct nvme_ctrl { u16 icdoff; u16 maxcmd; int nr_reconnects; + unsigned long flags; +#define NVME_CTRL_FAILFAST_EXPIRED 0 struct nvmf_ctrl_options *opts; struct page *discard_page; @@ -442,6 +446,7 @@ struct nvme_ns { #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 #define NVME_NS_ANA_PENDING 2 +#define NVME_NS_FORCE_RO 3 struct nvme_fault_inject fault_inject; @@ -604,6 +609,8 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); #define NVME_QID_ANY -1 struct request *nvme_alloc_request(struct request_queue *q, + struct nvme_command *cmd, blk_mq_req_flags_t flags); +struct request *nvme_alloc_request_qid(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid); void nvme_cleanup_cmd(struct request *req); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3be352403839..b4385cb0ff60 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1319,13 +1319,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) req->tag, nvmeq->qid); abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, - BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); + BLK_MQ_REQ_NOWAIT); if (IS_ERR(abort_req)) { atomic_inc(&dev->ctrl.abort_limit); return BLK_EH_RESET_TIMER; } - abort_req->timeout = ADMIN_TIMEOUT; abort_req->end_io_data = NULL; blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio); @@ -1622,7 +1621,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.nr_hw_queues = 1; dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - dev->admin_tagset.timeout = ADMIN_TIMEOUT; + dev->admin_tagset.timeout = NVME_ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev->ctrl.numa_node; dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; @@ -2104,6 +2103,12 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) static unsigned int nvme_max_io_queues(struct nvme_dev *dev) { + /* + * If tags are shared with admin queue (Apple bug), then + * make sure we only use one IO queue. + */ + if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) + return 1; return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues; } @@ -2122,16 +2127,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) dev->nr_write_queues = write_queues; dev->nr_poll_queues = poll_queues; - /* - * If tags are shared with admin queue (Apple bug), then - * make sure we only use one IO queue. - */ - if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) - nr_io_queues = 1; - else - nr_io_queues = min(nvme_max_io_queues(dev), - dev->nr_allocated_queues - 1); - + nr_io_queues = dev->nr_allocated_queues - 1; result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; @@ -2234,11 +2230,10 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) cmd.delete_queue.opcode = opcode; cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); - req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY); + req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); if (IS_ERR(req)) return PTR_ERR(req); - req->timeout = ADMIN_TIMEOUT; req->end_io_data = nvmeq; init_completion(&nvmeq->delete_done); @@ -2254,7 +2249,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) unsigned long timeout; retry: - timeout = ADMIN_TIMEOUT; + timeout = NVME_ADMIN_TIMEOUT; while (nr_queues > 0) { if (nvme_delete_queue(&dev->queues[nr_queues], opcode)) break; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 65e3d0ef36e1..df9f6f4549f1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -797,7 +797,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, NVME_RDMA_DATA_SGL_SIZE; set->driver_data = ctrl; set->nr_hw_queues = 1; - set->timeout = ADMIN_TIMEOUT; + set->timeout = NVME_ADMIN_TIMEOUT; set->flags = BLK_MQ_F_NO_SCHED; } else { set = &ctrl->tag_set; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c0c33320fe65..1ba659927442 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1568,7 +1568,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = 1; - set->timeout = ADMIN_TIMEOUT; + set->timeout = NVME_ADMIN_TIMEOUT; } else { set = &ctrl->tag_set; memset(set, 0, sizeof(*set)); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 67e87e9f306f..1dfe9a3500e3 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -55,12 +55,17 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) int status; /* Driver requires zone append support */ - if (!(le32_to_cpu(log->iocs[nvme_cmd_zone_append]) & + if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) & NVME_CMD_EFFECTS_CSUPP)) { + if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags)) + dev_warn(ns->ctrl->device, + "Zone Append supported for zoned namespace:%d. Remove read-only mode\n", + ns->head->ns_id); + } else { + set_bit(NVME_NS_FORCE_RO, &ns->flags); dev_warn(ns->ctrl->device, - "append not supported for zoned namespace:%d\n", - ns->head->ns_id); - return -EINVAL; + "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n", + ns->head->ns_id); } /* Lazily query controller append limit for the first zoned namespace */ diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 8056955e652c..4be2ececbc45 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -24,7 +24,7 @@ config NVME_TARGET_PASSTHRU This enables target side NVMe passthru controller support for the NVMe Over Fabrics protocol. It allows for hosts to manage and directly access an actual NVMe controller residing on the target - side, incuding executing Vendor Unique Commands. + side, including executing Vendor Unique Commands. If unsure, say N. diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dca34489a1dc..8d90235e4fcc 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -89,12 +89,12 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req, if (!ns->bdev) goto out; - host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); - data_units_read = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, - sectors[READ]), 1000); - host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); - data_units_written = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, - sectors[WRITE]), 1000); + host_reads = part_stat_read(ns->bdev, ios[READ]); + data_units_read = + DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[READ]), 1000); + host_writes = part_stat_read(ns->bdev, ios[WRITE]); + data_units_written = + DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[WRITE]), 1000); put_unaligned_le64(host_reads, &slog->host_reads[0]); put_unaligned_le64(data_units_read, &slog->data_units_read[0]); @@ -120,12 +120,12 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req, /* we don't have the right data for file backed ns */ if (!ns->bdev) continue; - host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); + host_reads += part_stat_read(ns->bdev, ios[READ]); data_units_read += DIV_ROUND_UP( - part_stat_read(ns->bdev->bd_part, sectors[READ]), 1000); - host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]); + part_stat_read(ns->bdev, sectors[READ]), 1000); + host_writes += part_stat_read(ns->bdev, ios[WRITE]); data_units_written += DIV_ROUND_UP( - part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000); + part_stat_read(ns->bdev, sectors[WRITE]), 1000); } put_unaligned_le64(host_reads, &slog->host_reads[0]); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 37e1d7784e17..c61ffd767062 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -736,9 +736,49 @@ static ssize_t nvmet_passthru_enable_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_passthru_, enable); +static ssize_t nvmet_passthru_admin_timeout_show(struct config_item *item, + char *page) +{ + return sprintf(page, "%u\n", to_subsys(item->ci_parent)->admin_timeout); +} + +static ssize_t nvmet_passthru_admin_timeout_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item->ci_parent); + unsigned int timeout; + + if (kstrtouint(page, 0, &timeout)) + return -EINVAL; + subsys->admin_timeout = timeout; + return count; +} +CONFIGFS_ATTR(nvmet_passthru_, admin_timeout); + +static ssize_t nvmet_passthru_io_timeout_show(struct config_item *item, + char *page) +{ + return sprintf(page, "%u\n", to_subsys(item->ci_parent)->io_timeout); +} + +static ssize_t nvmet_passthru_io_timeout_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item->ci_parent); + unsigned int timeout; + + if (kstrtouint(page, 0, &timeout)) + return -EINVAL; + subsys->io_timeout = timeout; + return count; +} +CONFIGFS_ATTR(nvmet_passthru_, io_timeout); + static struct configfs_attribute *nvmet_passthru_attrs[] = { &nvmet_passthru_attr_device_path, &nvmet_passthru_attr_enable, + &nvmet_passthru_attr_admin_timeout, + &nvmet_passthru_attr_io_timeout, NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 957b39a82431..8ce4d59cc9e7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -757,8 +757,6 @@ void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, { cq->qid = qid; cq->size = size; - - ctrl->cqs[qid] = cq; } void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, @@ -1344,20 +1342,14 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!ctrl->changed_ns_list) goto out_free_ctrl; - ctrl->cqs = kcalloc(subsys->max_qid + 1, - sizeof(struct nvmet_cq *), - GFP_KERNEL); - if (!ctrl->cqs) - goto out_free_changed_ns_list; - ctrl->sqs = kcalloc(subsys->max_qid + 1, sizeof(struct nvmet_sq *), GFP_KERNEL); if (!ctrl->sqs) - goto out_free_cqs; + goto out_free_changed_ns_list; if (subsys->cntlid_min > subsys->cntlid_max) - goto out_free_cqs; + goto out_free_changed_ns_list; ret = ida_simple_get(&cntlid_ida, subsys->cntlid_min, subsys->cntlid_max, @@ -1395,8 +1387,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, out_free_sqs: kfree(ctrl->sqs); -out_free_cqs: - kfree(ctrl->cqs); out_free_changed_ns_list: kfree(ctrl->changed_ns_list); out_free_ctrl: @@ -1426,7 +1416,6 @@ static void nvmet_ctrl_free(struct kref *ref) nvmet_async_events_free(ctrl); kfree(ctrl->sqs); - kfree(ctrl->cqs); kfree(ctrl->changed_ns_list); kfree(ctrl); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index f40c05c33c3a..682854e0e079 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -69,6 +69,7 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, struct nvmet_port *port; struct nvmet_subsys_link *s; + lockdep_assert_held(&nvmet_config_sem); nvmet_genctr++; list_for_each_entry(port, nvmet_ports, global_entry) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 3da067a8311e..733d9363900e 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -564,6 +564,50 @@ fcloop_call_host_done(struct nvmefc_fcp_req *fcpreq, fcloop_tfcp_req_put(tfcp_req); } +static bool drop_fabric_opcode; +#define DROP_OPCODE_MASK 0x00FF +/* fabrics opcode will have a bit set above 1st byte */ +static int drop_opcode = -1; +static int drop_instance; +static int drop_amount; +static int drop_current_cnt; + +/* + * Routine to parse io and determine if the io is to be dropped. + * Returns: + * 0 if io is not obstructed + * 1 if io was dropped + */ +static int check_for_drop(struct fcloop_fcpreq *tfcp_req) +{ + struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvme_fc_cmd_iu *cmdiu = fcpreq->cmdaddr; + struct nvme_command *sqe = &cmdiu->sqe; + + if (drop_opcode == -1) + return 0; + + pr_info("%s: seq opcd x%02x fctype x%02x: drop F %s op x%02x " + "inst %d start %d amt %d\n", + __func__, sqe->common.opcode, sqe->fabrics.fctype, + drop_fabric_opcode ? "y" : "n", + drop_opcode, drop_current_cnt, drop_instance, drop_amount); + + if ((drop_fabric_opcode && + (sqe->common.opcode != nvme_fabrics_command || + sqe->fabrics.fctype != drop_opcode)) || + (!drop_fabric_opcode && sqe->common.opcode != drop_opcode)) + return 0; + + if (++drop_current_cnt >= drop_instance) { + if (drop_current_cnt >= drop_instance + drop_amount) + drop_opcode = -1; + return 1; + } + + return 0; +} + static void fcloop_fcp_recv_work(struct work_struct *work) { @@ -590,10 +634,14 @@ fcloop_fcp_recv_work(struct work_struct *work) if (unlikely(aborted)) ret = -ECANCELED; - else - ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, + else { + if (likely(!check_for_drop(tfcp_req))) + ret = nvmet_fc_rcv_fcp_req(tfcp_req->tport->targetport, &tfcp_req->tgt_fcp_req, fcpreq->cmdaddr, fcpreq->cmdlen); + else + pr_info("%s: dropped command ********\n", __func__); + } if (ret) fcloop_call_host_done(fcpreq, tfcp_req, ret); @@ -1449,6 +1497,33 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr, return ret ? ret : count; } +static ssize_t +fcloop_set_cmd_drop(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int opcode, starting, amount; + + if (sscanf(buf, "%x:%d:%d", &opcode, &starting, &amount) != 3) + return -EBADRQC; + + drop_current_cnt = 0; + drop_fabric_opcode = (opcode & ~DROP_OPCODE_MASK) ? true : false; + drop_opcode = (opcode & DROP_OPCODE_MASK); + drop_instance = starting; + /* the check to drop routine uses instance + count to know when + * to end. Thus, if dropping 1 instance, count should be 0. + * so subtract 1 from the count. + */ + drop_amount = amount - 1; + + pr_info("%s: DROP: Starting at instance %d of%s opcode x%x drop +%d " + "instances\n", + __func__, drop_instance, drop_fabric_opcode ? " fabric" : "", + drop_opcode, drop_amount); + + return count; +} + static DEVICE_ATTR(add_local_port, 0200, NULL, fcloop_create_local_port); static DEVICE_ATTR(del_local_port, 0200, NULL, fcloop_delete_local_port); @@ -1456,6 +1531,7 @@ static DEVICE_ATTR(add_remote_port, 0200, NULL, fcloop_create_remote_port); static DEVICE_ATTR(del_remote_port, 0200, NULL, fcloop_delete_remote_port); static DEVICE_ATTR(add_target_port, 0200, NULL, fcloop_create_target_port); static DEVICE_ATTR(del_target_port, 0200, NULL, fcloop_delete_target_port); +static DEVICE_ATTR(set_cmd_drop, 0200, NULL, fcloop_set_cmd_drop); static struct attribute *fcloop_dev_attrs[] = { &dev_attr_add_local_port.attr, @@ -1464,6 +1540,7 @@ static struct attribute *fcloop_dev_attrs[] = { &dev_attr_del_remote_port.attr, &dev_attr_add_target_port.attr, &dev_attr_del_target_port.attr, + &dev_attr_set_cmd_drop.attr, NULL }; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index f6d81239be21..cb6f86572b24 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -211,6 +211,8 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, (set == &ctrl->tag_set) ? hctx_idx + 1 : 0); } +static struct lock_class_key loop_hctx_fq_lock_key; + static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -219,6 +221,14 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); + /* + * flush_end_io() can be called recursively for us, so use our own + * lock class key for avoiding lockdep possible recursive locking, + * then we can remove the dynamically allocated lock class for each + * flush queue, that way may cause horrible boot delay. + */ + blk_mq_hctx_set_fq_lock_class(hctx, &loop_hctx_fq_lock_key); + hctx->driver_data = queue; return 0; } @@ -345,7 +355,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) NVME_INLINE_SG_CNT * sizeof(struct scatterlist); ctrl->admin_tag_set.driver_data = ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; ctrl->queues[0].ctrl = ctrl; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 559a15ccc322..592763732065 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -164,7 +164,6 @@ static inline struct nvmet_port *ana_groups_to_port( struct nvmet_ctrl { struct nvmet_subsys *subsys; - struct nvmet_cq **cqs; struct nvmet_sq **sqs; bool cmd_seen; @@ -249,6 +248,8 @@ struct nvmet_subsys { struct nvme_ctrl *passthru_ctrl; char *passthru_ctrl_path; struct config_group passthru_group; + unsigned int admin_timeout; + unsigned int io_timeout; #endif /* CONFIG_NVME_TARGET_PASSTHRU */ }; @@ -330,6 +331,7 @@ struct nvmet_req { struct work_struct work; } f; struct { + struct bio inline_bio; struct request *rq; struct work_struct work; bool use_workqueue; diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 8ee94f056898..b9776fc8f08f 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -188,35 +188,31 @@ static void nvmet_passthru_req_done(struct request *rq, static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) { struct scatterlist *sg; - int op_flags = 0; struct bio *bio; - int i, ret; + int i; if (req->sg_cnt > BIO_MAX_PAGES) return -EINVAL; - if (req->cmd->common.opcode == nvme_cmd_flush) - op_flags = REQ_FUA; - else if (nvme_is_write(req->cmd)) - op_flags = REQ_SYNC | REQ_IDLE; - - bio = bio_alloc(GFP_KERNEL, req->sg_cnt); - bio->bi_end_io = bio_put; - bio->bi_opf = req_op(rq) | op_flags; + if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { + bio = &req->p.inline_bio; + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); + } else { + bio = bio_alloc(GFP_KERNEL, min(req->sg_cnt, BIO_MAX_PAGES)); + bio->bi_end_io = bio_put; + } + bio->bi_opf = req_op(rq); for_each_sg(req->sg, sg, req->sg_cnt, i) { if (bio_add_pc_page(rq->q, bio, sg_page(sg), sg->length, sg->offset) < sg->length) { - bio_put(bio); + if (bio != &req->p.inline_bio) + bio_put(bio); return -EINVAL; } } - ret = blk_rq_append_bio(rq, &bio); - if (unlikely(ret)) { - bio_put(bio); - return ret; - } + blk_rq_bio_prep(rq, bio, req->sg_cnt); return 0; } @@ -227,6 +223,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) struct request_queue *q = ctrl->admin_q; struct nvme_ns *ns = NULL; struct request *rq = NULL; + unsigned int timeout; u32 effects; u16 status; int ret; @@ -242,14 +239,20 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) } q = ns->queue; + timeout = req->sq->ctrl->subsys->io_timeout; + } else { + timeout = req->sq->ctrl->subsys->admin_timeout; } - rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY); + rq = nvme_alloc_request(q, req->cmd, 0); if (IS_ERR(rq)) { status = NVME_SC_INTERNAL; goto out_put_ns; } + if (timeout) + rq->timeout = timeout; + if (req->sg_cnt) { ret = nvmet_passthru_map_sg(req, rq); if (unlikely(ret)) { diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index 07d162b179fc..dceec715e745 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -35,6 +35,13 @@ config RESET_AXS10X help This enables the reset controller driver for AXS10x. +config RESET_BCM6345 + bool "BCM6345 Reset Controller" + depends on BMIPS_GENERIC || COMPILE_TEST + default BMIPS_GENERIC + help + This enables the reset controller driver for BCM6345 SoCs. + config RESET_BERLIN bool "Berlin Reset Driver" if COMPILE_TEST default ARCH_BERLIN diff --git a/drivers/reset/Makefile b/drivers/reset/Makefile index 16947610cc3b..1054123fd187 100644 --- a/drivers/reset/Makefile +++ b/drivers/reset/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra/ obj-$(CONFIG_RESET_A10SR) += reset-a10sr.o obj-$(CONFIG_RESET_ATH79) += reset-ath79.o obj-$(CONFIG_RESET_AXS10X) += reset-axs10x.o +obj-$(CONFIG_RESET_BCM6345) += reset-bcm6345.o obj-$(CONFIG_RESET_BERLIN) += reset-berlin.o obj-$(CONFIG_RESET_BRCMSTB) += reset-brcmstb.o obj-$(CONFIG_RESET_BRCMSTB_RESCAL) += reset-brcmstb-rescal.o diff --git a/drivers/reset/reset-bcm6345.c b/drivers/reset/reset-bcm6345.c new file mode 100644 index 000000000000..737e4e81f6b7 --- /dev/null +++ b/drivers/reset/reset-bcm6345.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * BCM6345 Reset Controller Driver + * + * Copyright (C) 2020 Álvaro Fernández Rojas + */ + +#include +#include +#include +#include +#include +#include + +#define BCM6345_RESET_NUM 32 +#define BCM6345_RESET_SLEEP_MIN_US 10000 +#define BCM6345_RESET_SLEEP_MAX_US 20000 + +struct bcm6345_reset { + struct reset_controller_dev rcdev; + void __iomem *base; + spinlock_t lock; +}; + +static inline struct bcm6345_reset * +to_bcm6345_reset(struct reset_controller_dev *rcdev) +{ + return container_of(rcdev, struct bcm6345_reset, rcdev); +} + +static int bcm6345_reset_update(struct reset_controller_dev *rcdev, + unsigned long id, bool assert) +{ + struct bcm6345_reset *bcm6345_reset = to_bcm6345_reset(rcdev); + unsigned long flags; + uint32_t val; + + spin_lock_irqsave(&bcm6345_reset->lock, flags); + val = __raw_readl(bcm6345_reset->base); + if (assert) + val &= ~BIT(id); + else + val |= BIT(id); + __raw_writel(val, bcm6345_reset->base); + spin_unlock_irqrestore(&bcm6345_reset->lock, flags); + + return 0; +} + +static int bcm6345_reset_assert(struct reset_controller_dev *rcdev, + unsigned long id) +{ + return bcm6345_reset_update(rcdev, id, true); +} + +static int bcm6345_reset_deassert(struct reset_controller_dev *rcdev, + unsigned long id) +{ + return bcm6345_reset_update(rcdev, id, false); +} + +static int bcm6345_reset_reset(struct reset_controller_dev *rcdev, + unsigned long id) +{ + bcm6345_reset_update(rcdev, id, true); + usleep_range(BCM6345_RESET_SLEEP_MIN_US, + BCM6345_RESET_SLEEP_MAX_US); + + bcm6345_reset_update(rcdev, id, false); + /* + * Ensure component is taken out reset state by sleeping also after + * deasserting the reset. Otherwise, the component may not be ready + * for operation. + */ + usleep_range(BCM6345_RESET_SLEEP_MIN_US, + BCM6345_RESET_SLEEP_MAX_US); + + return 0; +} + +static int bcm6345_reset_status(struct reset_controller_dev *rcdev, + unsigned long id) +{ + struct bcm6345_reset *bcm6345_reset = to_bcm6345_reset(rcdev); + + return !(__raw_readl(bcm6345_reset->base) & BIT(id)); +} + +static struct reset_control_ops bcm6345_reset_ops = { + .assert = bcm6345_reset_assert, + .deassert = bcm6345_reset_deassert, + .reset = bcm6345_reset_reset, + .status = bcm6345_reset_status, +}; + +static int bcm6345_reset_probe(struct platform_device *pdev) +{ + struct bcm6345_reset *bcm6345_reset; + + bcm6345_reset = devm_kzalloc(&pdev->dev, + sizeof(*bcm6345_reset), GFP_KERNEL); + if (!bcm6345_reset) + return -ENOMEM; + + platform_set_drvdata(pdev, bcm6345_reset); + + bcm6345_reset->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(bcm6345_reset->base)) + return PTR_ERR(bcm6345_reset->base); + + spin_lock_init(&bcm6345_reset->lock); + bcm6345_reset->rcdev.ops = &bcm6345_reset_ops; + bcm6345_reset->rcdev.owner = THIS_MODULE; + bcm6345_reset->rcdev.of_node = pdev->dev.of_node; + bcm6345_reset->rcdev.of_reset_n_cells = 1; + bcm6345_reset->rcdev.nr_resets = BCM6345_RESET_NUM; + + return devm_reset_controller_register(&pdev->dev, + &bcm6345_reset->rcdev); +} + +static const struct of_device_id bcm6345_reset_of_match[] = { + { .compatible = "brcm,bcm6345-reset" }, + { /* sentinel */ }, +}; + +static struct platform_driver bcm6345_reset_driver = { + .probe = bcm6345_reset_probe, + .driver = { + .name = "bcm6345-reset", + .of_match_table = bcm6345_reset_of_match, + .suppress_bind_attrs = true, + }, +}; +builtin_platform_driver(bcm6345_reset_driver); diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index fd568248fd26..c7eb9a10c680 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -430,7 +430,7 @@ dasd_state_ready_to_online(struct dasd_device * device) { struct gendisk *disk; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; device->state = DASD_STATE_ONLINE; if (device->block) { @@ -443,7 +443,7 @@ dasd_state_ready_to_online(struct dasd_device * device) disk = device->block->bdev->bd_disk; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); + kobject_uevent(bdev_kobj(part), KOBJ_CHANGE); disk_part_iter_exit(&piter); } return 0; @@ -457,7 +457,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device) int rc; struct gendisk *disk; struct disk_part_iter piter; - struct hd_struct *part; + struct block_device *part; if (device->discipline->online_to_ready) { rc = device->discipline->online_to_ready(device); @@ -470,7 +470,7 @@ static int dasd_state_online_to_ready(struct dasd_device *device) disk = device->block->bdev->bd_disk; disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); while ((part = disk_part_iter_next(&piter))) - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); + kobject_uevent(bdev_kobj(part), KOBJ_CHANGE); disk_part_iter_exit(&piter); } return 0; @@ -2084,19 +2084,24 @@ static void __dasd_device_start_head(struct dasd_device *device) static void __dasd_device_check_path_events(struct dasd_device *device) { + __u8 tbvpm, fcsecpm; int rc; - if (!dasd_path_get_tbvpm(device)) + tbvpm = dasd_path_get_tbvpm(device); + fcsecpm = dasd_path_get_fcsecpm(device); + + if (!tbvpm && !fcsecpm) return; if (device->stopped & ~(DASD_STOPPED_DC_WAIT)) return; - rc = device->discipline->verify_path(device, - dasd_path_get_tbvpm(device)); - if (rc) + rc = device->discipline->pe_handler(device, tbvpm, fcsecpm); + if (rc) { dasd_device_set_timer(device, 50); - else + } else { dasd_path_clear_all_verify(device); + dasd_path_clear_all_fcsec(device); + } }; /* @@ -3376,6 +3381,7 @@ dasd_device_operations = { .ioctl = dasd_ioctl, .compat_ioctl = dasd_ioctl, .getgeo = dasd_getgeo, + .set_read_only = dasd_set_read_only, }; /******************************************************************************* @@ -3447,8 +3453,7 @@ static void dasd_generic_auto_online(void *data, async_cookie_t cookie) * Initial attempt at a probe function. this can be simplified once * the other detection code is gone. */ -int dasd_generic_probe(struct ccw_device *cdev, - struct dasd_discipline *discipline) +int dasd_generic_probe(struct ccw_device *cdev) { int ret; @@ -3846,6 +3851,10 @@ void dasd_generic_path_event(struct ccw_device *cdev, int *path_event) if (device->discipline->kick_validate) device->discipline->kick_validate(device); } + if (path_event[chp] & PE_PATH_FCES_EVENT) { + dasd_path_fcsec_update(device, chp); + dasd_schedule_device_bh(device); + } } hpfpm = dasd_path_get_hpfpm(device); ifccpm = dasd_path_get_ifccpm(device); diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 32fc51341d99..16bb135c20aa 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -576,6 +576,11 @@ dasd_create_device(struct ccw_device *cdev) dev_set_drvdata(&cdev->dev, device); spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + device->paths_info = kset_create_and_add("paths_info", NULL, + &device->cdev->dev.kobj); + if (!device->paths_info) + dev_warn(&cdev->dev, "Could not create paths_info kset\n"); + return device; } @@ -622,6 +627,9 @@ dasd_delete_device(struct dasd_device *device) wait_event(dasd_delete_wq, atomic_read(&device->ref_count) == 0); dasd_generic_free_discipline(device); + + kset_unregister(device->paths_info); + /* Disconnect dasd_device structure from ccw_device structure. */ cdev = device->cdev; device->cdev = NULL; @@ -1641,6 +1649,39 @@ dasd_path_interval_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(path_interval, 0644, dasd_path_interval_show, dasd_path_interval_store); +static ssize_t +dasd_device_fcs_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dasd_device *device; + int fc_sec; + int rc; + + device = dasd_device_from_cdev(to_ccwdev(dev)); + if (IS_ERR(device)) + return -ENODEV; + fc_sec = dasd_path_get_fcs_device(device); + if (fc_sec == -EINVAL) + rc = snprintf(buf, PAGE_SIZE, "Inconsistent\n"); + else + rc = snprintf(buf, PAGE_SIZE, "%s\n", dasd_path_get_fcs_str(fc_sec)); + dasd_put_device(device); + + return rc; +} +static DEVICE_ATTR(fc_security, 0444, dasd_device_fcs_show, NULL); + +static ssize_t +dasd_path_fcs_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct dasd_path *path = to_dasd_path(kobj); + unsigned int fc_sec = path->fc_security; + + return snprintf(buf, PAGE_SIZE, "%s\n", dasd_path_get_fcs_str(fc_sec)); +} + +static struct kobj_attribute path_fcs_attribute = + __ATTR(fc_security, 0444, dasd_path_fcs_show, NULL); #define DASD_DEFINE_ATTR(_name, _func) \ static ssize_t dasd_##_name##_show(struct device *dev, \ @@ -1697,6 +1738,7 @@ static struct attribute * dasd_attrs[] = { &dev_attr_path_reset.attr, &dev_attr_hpf.attr, &dev_attr_ese.attr, + &dev_attr_fc_security.attr, NULL, }; @@ -1777,6 +1819,73 @@ dasd_set_feature(struct ccw_device *cdev, int feature, int flag) } EXPORT_SYMBOL(dasd_set_feature); +static struct attribute *paths_info_attrs[] = { + &path_fcs_attribute.attr, + NULL, +}; + +static struct kobj_type path_attr_type = { + .release = dasd_path_release, + .default_attrs = paths_info_attrs, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static void dasd_path_init_kobj(struct dasd_device *device, int chp) +{ + device->path[chp].kobj.kset = device->paths_info; + kobject_init(&device->path[chp].kobj, &path_attr_type); +} + +void dasd_path_create_kobj(struct dasd_device *device, int chp) +{ + int rc; + + if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) + return; + if (!device->paths_info) { + dev_warn(&device->cdev->dev, "Unable to create paths objects\n"); + return; + } + if (device->path[chp].in_sysfs) + return; + if (!device->path[chp].conf_data) + return; + + dasd_path_init_kobj(device, chp); + + rc = kobject_add(&device->path[chp].kobj, NULL, "%x.%02x", + device->path[chp].cssid, device->path[chp].chpid); + if (rc) + kobject_put(&device->path[chp].kobj); + device->path[chp].in_sysfs = true; +} +EXPORT_SYMBOL(dasd_path_create_kobj); + +void dasd_path_create_kobjects(struct dasd_device *device) +{ + u8 lpm, opm; + + opm = dasd_path_get_opm(device); + for (lpm = 0x80; lpm; lpm >>= 1) { + if (!(lpm & opm)) + continue; + dasd_path_create_kobj(device, pathmask_to_pos(lpm)); + } +} +EXPORT_SYMBOL(dasd_path_create_kobjects); + +/* + * As we keep kobjects for the lifetime of a device, this function must not be + * called anywhere but in the context of offlining a device. + */ +void dasd_path_remove_kobj(struct dasd_device *device, int chp) +{ + if (device->path[chp].in_sysfs) { + kobject_put(&device->path[chp].kobj); + device->path[chp].in_sysfs = false; + } +} +EXPORT_SYMBOL(dasd_path_remove_kobj); int dasd_add_sysfs_files(struct ccw_device *cdev) { diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 758ee4153ac1..3caa1ee5f4b0 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -103,7 +103,7 @@ struct ext_pool_exhaust_work_data { }; /* definitions for the path verification worker */ -struct path_verification_work_data { +struct pe_handler_work_data { struct work_struct worker; struct dasd_device *device; struct dasd_ccw_req cqr; @@ -111,9 +111,10 @@ struct path_verification_work_data { __u8 rcd_buffer[DASD_ECKD_RCD_DATA_SIZE]; int isglobal; __u8 tbvpm; + __u8 fcsecpm; }; -static struct path_verification_work_data *path_verification_worker; -static DEFINE_MUTEX(dasd_path_verification_mutex); +static struct pe_handler_work_data *pe_handler_worker; +static DEFINE_MUTEX(dasd_pe_handler_mutex); struct check_attention_work_data { struct work_struct worker; @@ -143,7 +144,7 @@ dasd_eckd_probe (struct ccw_device *cdev) "ccw-device options"); return ret; } - ret = dasd_generic_probe(cdev, &dasd_eckd_discipline); + ret = dasd_generic_probe(cdev); return ret; } @@ -1000,6 +1001,27 @@ static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len) return 0; } +static void dasd_eckd_store_conf_data(struct dasd_device *device, + struct dasd_conf_data *conf_data, int chp) +{ + struct channel_path_desc_fmt0 *chp_desc; + struct subchannel_id sch_id; + + ccw_device_get_schid(device->cdev, &sch_id); + /* + * path handling and read_conf allocate data + * free it before replacing the pointer + */ + kfree(device->path[chp].conf_data); + device->path[chp].conf_data = conf_data; + device->path[chp].cssid = sch_id.cssid; + device->path[chp].ssid = sch_id.ssid; + chp_desc = ccw_device_get_chp_desc(device->cdev, chp); + if (chp_desc) + device->path[chp].chpid = chp_desc->chpid; + kfree(chp_desc); +} + static void dasd_eckd_clear_conf_data(struct dasd_device *device) { struct dasd_eckd_private *private = device->private; @@ -1013,9 +1035,33 @@ static void dasd_eckd_clear_conf_data(struct dasd_device *device) device->path[i].cssid = 0; device->path[i].ssid = 0; device->path[i].chpid = 0; + dasd_path_notoper(device, i); + dasd_path_remove_kobj(device, i); } } +static void dasd_eckd_read_fc_security(struct dasd_device *device) +{ + struct dasd_eckd_private *private = device->private; + u8 esm_valid; + u8 esm[8]; + int chp; + int rc; + + rc = chsc_scud(private->uid.ssid, (u64 *)esm, &esm_valid); + if (rc) { + for (chp = 0; chp < 8; chp++) + device->path[chp].fc_security = 0; + return; + } + + for (chp = 0; chp < 8; chp++) { + if (esm_valid & (0x80 >> chp)) + device->path[chp].fc_security = esm[chp]; + else + device->path[chp].fc_security = 0; + } +} static int dasd_eckd_read_conf(struct dasd_device *device) { @@ -1026,12 +1072,9 @@ static int dasd_eckd_read_conf(struct dasd_device *device) struct dasd_eckd_private *private, path_private; struct dasd_uid *uid; char print_path_uid[60], print_device_uid[60]; - struct channel_path_desc_fmt0 *chp_desc; - struct subchannel_id sch_id; private = device->private; opm = ccw_device_get_path_mask(device->cdev); - ccw_device_get_schid(device->cdev, &sch_id); conf_data_saved = 0; path_err = 0; /* get configuration data per operational path */ @@ -1066,15 +1109,6 @@ static int dasd_eckd_read_conf(struct dasd_device *device) kfree(conf_data); continue; } - pos = pathmask_to_pos(lpm); - /* store per path conf_data */ - device->path[pos].conf_data = conf_data; - device->path[pos].cssid = sch_id.cssid; - device->path[pos].ssid = sch_id.ssid; - chp_desc = ccw_device_get_chp_desc(device->cdev, pos); - if (chp_desc) - device->path[pos].chpid = chp_desc->chpid; - kfree(chp_desc); /* * build device UID that other path data * can be compared to it @@ -1132,18 +1166,13 @@ static int dasd_eckd_read_conf(struct dasd_device *device) dasd_path_add_cablepm(device, lpm); continue; } - pos = pathmask_to_pos(lpm); - /* store per path conf_data */ - device->path[pos].conf_data = conf_data; - device->path[pos].cssid = sch_id.cssid; - device->path[pos].ssid = sch_id.ssid; - chp_desc = ccw_device_get_chp_desc(device->cdev, pos); - if (chp_desc) - device->path[pos].chpid = chp_desc->chpid; - kfree(chp_desc); path_private.conf_data = NULL; path_private.conf_len = 0; } + + pos = pathmask_to_pos(lpm); + dasd_eckd_store_conf_data(device, conf_data, pos); + switch (dasd_eckd_path_access(conf_data, conf_len)) { case 0x02: dasd_path_add_nppm(device, lpm); @@ -1160,6 +1189,8 @@ static int dasd_eckd_read_conf(struct dasd_device *device) } } + dasd_eckd_read_fc_security(device); + return path_err; } @@ -1219,7 +1250,7 @@ static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) } static int rebuild_device_uid(struct dasd_device *device, - struct path_verification_work_data *data) + struct pe_handler_work_data *data) { struct dasd_eckd_private *private = device->private; __u8 lpm, opm = dasd_path_get_opm(device); @@ -1257,31 +1288,18 @@ static int rebuild_device_uid(struct dasd_device *device, return rc; } -static void do_path_verification_work(struct work_struct *work) +static void dasd_eckd_path_available_action(struct dasd_device *device, + struct pe_handler_work_data *data) { - struct path_verification_work_data *data; - struct dasd_device *device; struct dasd_eckd_private path_private; struct dasd_uid *uid; __u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE]; __u8 lpm, opm, npm, ppm, epm, hpfpm, cablepm; + struct dasd_conf_data *conf_data; unsigned long flags; char print_uid[60]; - int rc; + int rc, pos; - data = container_of(work, struct path_verification_work_data, worker); - device = data->device; - - /* delay path verification until device was resumed */ - if (test_bit(DASD_FLAG_SUSPENDED, &device->flags)) { - schedule_work(work); - return; - } - /* check if path verification already running and delay if so */ - if (test_and_set_bit(DASD_FLAG_PATH_VERIFY, &device->flags)) { - schedule_work(work); - return; - } opm = 0; npm = 0; ppm = 0; @@ -1397,6 +1415,14 @@ static void do_path_verification_work(struct work_struct *work) } } + conf_data = kzalloc(DASD_ECKD_RCD_DATA_SIZE, GFP_KERNEL); + if (conf_data) { + memcpy(conf_data, data->rcd_buffer, + DASD_ECKD_RCD_DATA_SIZE); + } + pos = pathmask_to_pos(lpm); + dasd_eckd_store_conf_data(device, conf_data, pos); + /* * There is a small chance that a path is lost again between * above path verification and the following modification of @@ -1417,34 +1443,65 @@ static void do_path_verification_work(struct work_struct *work) dasd_path_add_cablepm(device, cablepm); dasd_path_add_nohpfpm(device, hpfpm); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); + + dasd_path_create_kobj(device, pos); } +} + +static void do_pe_handler_work(struct work_struct *work) +{ + struct pe_handler_work_data *data; + struct dasd_device *device; + + data = container_of(work, struct pe_handler_work_data, worker); + device = data->device; + + /* delay path verification until device was resumed */ + if (test_bit(DASD_FLAG_SUSPENDED, &device->flags)) { + schedule_work(work); + return; + } + /* check if path verification already running and delay if so */ + if (test_and_set_bit(DASD_FLAG_PATH_VERIFY, &device->flags)) { + schedule_work(work); + return; + } + + if (data->tbvpm) + dasd_eckd_path_available_action(device, data); + if (data->fcsecpm) + dasd_eckd_read_fc_security(device); + clear_bit(DASD_FLAG_PATH_VERIFY, &device->flags); dasd_put_device(device); if (data->isglobal) - mutex_unlock(&dasd_path_verification_mutex); + mutex_unlock(&dasd_pe_handler_mutex); else kfree(data); } -static int dasd_eckd_verify_path(struct dasd_device *device, __u8 lpm) +static int dasd_eckd_pe_handler(struct dasd_device *device, + __u8 tbvpm, __u8 fcsecpm) { - struct path_verification_work_data *data; + struct pe_handler_work_data *data; data = kmalloc(sizeof(*data), GFP_ATOMIC | GFP_DMA); if (!data) { - if (mutex_trylock(&dasd_path_verification_mutex)) { - data = path_verification_worker; + if (mutex_trylock(&dasd_pe_handler_mutex)) { + data = pe_handler_worker; data->isglobal = 1; - } else + } else { return -ENOMEM; + } } else { memset(data, 0, sizeof(*data)); data->isglobal = 0; } - INIT_WORK(&data->worker, do_path_verification_work); + INIT_WORK(&data->worker, do_pe_handler_work); dasd_get_device(device); data->device = device; - data->tbvpm = lpm; + data->tbvpm = tbvpm; + data->fcsecpm = fcsecpm; schedule_work(&data->worker); return 0; } @@ -2056,6 +2113,8 @@ dasd_eckd_check_characteristics(struct dasd_device *device) if (rc) goto out_err3; + dasd_path_create_kobjects(device); + /* Read Feature Codes */ dasd_eckd_read_features(device); @@ -6590,7 +6649,7 @@ static struct dasd_discipline dasd_eckd_discipline = { .check_device = dasd_eckd_check_characteristics, .uncheck_device = dasd_eckd_uncheck_device, .do_analysis = dasd_eckd_do_analysis, - .verify_path = dasd_eckd_verify_path, + .pe_handler = dasd_eckd_pe_handler, .basic_to_ready = dasd_eckd_basic_to_ready, .online_to_ready = dasd_eckd_online_to_ready, .basic_to_known = dasd_eckd_basic_to_known, @@ -6649,16 +6708,16 @@ dasd_eckd_init(void) GFP_KERNEL | GFP_DMA); if (!dasd_vol_info_req) return -ENOMEM; - path_verification_worker = kmalloc(sizeof(*path_verification_worker), - GFP_KERNEL | GFP_DMA); - if (!path_verification_worker) { + pe_handler_worker = kmalloc(sizeof(*pe_handler_worker), + GFP_KERNEL | GFP_DMA); + if (!pe_handler_worker) { kfree(dasd_reserve_req); kfree(dasd_vol_info_req); return -ENOMEM; } rawpadpage = (void *)__get_free_page(GFP_KERNEL); if (!rawpadpage) { - kfree(path_verification_worker); + kfree(pe_handler_worker); kfree(dasd_reserve_req); kfree(dasd_vol_info_req); return -ENOMEM; @@ -6667,7 +6726,7 @@ dasd_eckd_init(void) if (!ret) wait_for_device_probe(); else { - kfree(path_verification_worker); + kfree(pe_handler_worker); kfree(dasd_reserve_req); kfree(dasd_vol_info_req); free_page((unsigned long)rawpadpage); @@ -6679,7 +6738,7 @@ static void __exit dasd_eckd_cleanup(void) { ccw_driver_unregister(&dasd_eckd_driver); - kfree(path_verification_worker); + kfree(pe_handler_worker); kfree(dasd_reserve_req); free_page((unsigned long)rawpadpage); } diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index c027344ee225..1aeb68794ce8 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -58,7 +58,7 @@ static struct ccw_driver dasd_fba_driver; /* see below */ static int dasd_fba_probe(struct ccw_device *cdev) { - return dasd_generic_probe(cdev, &dasd_fba_discipline); + return dasd_generic_probe(cdev); } static int diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 7a34161ea5c6..3bc008f9136c 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -298,6 +298,7 @@ struct dasd_discipline { * configuration. */ int (*verify_path)(struct dasd_device *, __u8); + int (*pe_handler)(struct dasd_device *, __u8, __u8); /* * Last things to do when a device is set online, and first things @@ -418,10 +419,40 @@ extern struct dasd_discipline *dasd_diag_discipline_pointer; #define DASD_PATH_NOHPF 6 #define DASD_PATH_CUIR 7 #define DASD_PATH_IFCC 8 +#define DASD_PATH_FCSEC 9 #define DASD_THRHLD_MAX 4294967295U #define DASD_INTERVAL_MAX 4294967295U +/* FC Endpoint Security Capabilities */ +#define DASD_FC_SECURITY_UNSUP 0 +#define DASD_FC_SECURITY_AUTH 1 +#define DASD_FC_SECURITY_ENC_FCSP2 2 +#define DASD_FC_SECURITY_ENC_ERAS 3 + +#define DASD_FC_SECURITY_ENC_STR "Encryption" +static const struct { + u8 value; + char *name; +} dasd_path_fcs_mnemonics[] = { + { DASD_FC_SECURITY_UNSUP, "Unsupported" }, + { DASD_FC_SECURITY_AUTH, "Authentication" }, + { DASD_FC_SECURITY_ENC_FCSP2, DASD_FC_SECURITY_ENC_STR }, + { DASD_FC_SECURITY_ENC_ERAS, DASD_FC_SECURITY_ENC_STR }, +}; + +static inline char *dasd_path_get_fcs_str(int val) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dasd_path_fcs_mnemonics); i++) { + if (dasd_path_fcs_mnemonics[i].value == val) + return dasd_path_fcs_mnemonics[i].name; + } + + return dasd_path_fcs_mnemonics[0].name; +} + struct dasd_path { unsigned long flags; u8 cssid; @@ -430,8 +461,18 @@ struct dasd_path { struct dasd_conf_data *conf_data; atomic_t error_count; unsigned long errorclk; + u8 fc_security; + struct kobject kobj; + bool in_sysfs; }; +#define to_dasd_path(path) container_of(path, struct dasd_path, kobj) + +static inline void dasd_path_release(struct kobject *kobj) +{ +/* Memory for the dasd_path kobject is freed when dasd_free_device() is called */ +} + struct dasd_profile_info { /* legacy part of profile data, as in dasd_profile_info_t */ @@ -542,6 +583,7 @@ struct dasd_device { struct dentry *hosts_dentry; struct dasd_profile profile; struct dasd_format_entry format_entry; + struct kset *paths_info; }; struct dasd_block { @@ -766,7 +808,7 @@ void dasd_block_set_timer(struct dasd_block *, int); void dasd_block_clear_timer(struct dasd_block *); int dasd_cancel_req(struct dasd_ccw_req *); int dasd_flush_device_queue(struct dasd_device *); -int dasd_generic_probe (struct ccw_device *, struct dasd_discipline *); +int dasd_generic_probe(struct ccw_device *); void dasd_generic_free_discipline(struct dasd_device *); void dasd_generic_remove (struct ccw_device *cdev); int dasd_generic_set_online(struct ccw_device *, struct dasd_discipline *); @@ -814,6 +856,9 @@ int dasd_set_feature(struct ccw_device *, int, int); int dasd_add_sysfs_files(struct ccw_device *); void dasd_remove_sysfs_files(struct ccw_device *); +void dasd_path_create_kobj(struct dasd_device *, int); +void dasd_path_create_kobjects(struct dasd_device *); +void dasd_path_remove_kobj(struct dasd_device *, int); struct dasd_device *dasd_device_from_cdev(struct ccw_device *); struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *); @@ -834,7 +879,8 @@ int dasd_scan_partitions(struct dasd_block *); void dasd_destroy_partitions(struct dasd_block *); /* externals in dasd_ioctl.c */ -int dasd_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); +int dasd_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); +int dasd_set_read_only(struct block_device *bdev, bool ro); /* externals in dasd_proc.c */ int dasd_proc_init(void); @@ -911,6 +957,29 @@ static inline void dasd_path_clear_all_verify(struct dasd_device *device) dasd_path_clear_verify(device, chp); } +static inline void dasd_path_fcsec(struct dasd_device *device, int chp) +{ + __set_bit(DASD_PATH_FCSEC, &device->path[chp].flags); +} + +static inline void dasd_path_clear_fcsec(struct dasd_device *device, int chp) +{ + __clear_bit(DASD_PATH_FCSEC, &device->path[chp].flags); +} + +static inline int dasd_path_need_fcsec(struct dasd_device *device, int chp) +{ + return test_bit(DASD_PATH_FCSEC, &device->path[chp].flags); +} + +static inline void dasd_path_clear_all_fcsec(struct dasd_device *device) +{ + int chp; + + for (chp = 0; chp < 8; chp++) + dasd_path_clear_fcsec(device, chp); +} + static inline void dasd_path_operational(struct dasd_device *device, int chp) { __set_bit(DASD_PATH_OPERATIONAL, &device->path[chp].flags); @@ -1036,6 +1105,17 @@ static inline __u8 dasd_path_get_tbvpm(struct dasd_device *device) return tbvpm; } +static inline int dasd_path_get_fcsecpm(struct dasd_device *device) +{ + int chp; + + for (chp = 0; chp < 8; chp++) + if (dasd_path_need_fcsec(device, chp)) + return 1; + + return 0; +} + static inline __u8 dasd_path_get_nppm(struct dasd_device *device) { int chp; @@ -1103,6 +1183,31 @@ static inline __u8 dasd_path_get_hpfpm(struct dasd_device *device) return hpfpm; } +static inline u8 dasd_path_get_fcs_path(struct dasd_device *device, int chp) +{ + return device->path[chp].fc_security; +} + +static inline int dasd_path_get_fcs_device(struct dasd_device *device) +{ + u8 fc_sec = 0; + int chp; + + for (chp = 0; chp < 8; chp++) { + if (device->opm & (0x80 >> chp)) { + fc_sec = device->path[chp].fc_security; + break; + } + } + for (; chp < 8; chp++) { + if (device->opm & (0x80 >> chp)) + if (device->path[chp].fc_security != fc_sec) + return -EINVAL; + } + + return fc_sec; +} + /* * add functions for path masks * the existing path mask will be extended by the given path mask @@ -1268,6 +1373,11 @@ static inline void dasd_path_notoper(struct dasd_device *device, int chp) dasd_path_clear_nonpreferred(device, chp); } +static inline void dasd_path_fcsec_update(struct dasd_device *device, int chp) +{ + dasd_path_fcsec(device, chp); +} + /* * remove all paths from normal operation */ diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index cb6427fb9f3d..9f6424408946 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -54,8 +54,6 @@ dasd_ioctl_enable(struct block_device *bdev) return -ENODEV; dasd_enable_device(base); - /* Formatting the dasd device can change the capacity. */ - bd_set_nr_sectors(bdev, get_capacity(base->block->gdp)); dasd_put_device(base); return 0; } @@ -88,7 +86,7 @@ dasd_ioctl_disable(struct block_device *bdev) * Set i_size to zero, since read, write, etc. check against this * value. */ - bd_set_nr_sectors(bdev, 0); + set_capacity(bdev->bd_disk, 0); dasd_put_device(base); return 0; } @@ -222,9 +220,8 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata) * enabling the device later. */ if (fdata->start_unit == 0) { - struct block_device *bdev = bdget_disk(block->gdp, 0); - bdev->bd_inode->i_blkbits = blksize_bits(fdata->blksize); - bdput(bdev); + block->gdp->part0->bd_inode->i_blkbits = + blksize_bits(fdata->blksize); } rc = base->discipline->format_device(base, fdata, 1); @@ -532,28 +529,22 @@ static int dasd_ioctl_information(struct dasd_block *block, void __user *argp, /* * Set read only */ -static int -dasd_ioctl_set_ro(struct block_device *bdev, void __user *argp) +int dasd_set_read_only(struct block_device *bdev, bool ro) { struct dasd_device *base; - int intval, rc; + int rc; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + /* do not manipulate hardware state for partitions */ if (bdev_is_partition(bdev)) - // ro setting is not allowed for partitions - return -EINVAL; - if (get_user(intval, (int __user *)argp)) - return -EFAULT; + return 0; + base = dasd_device_from_gendisk(bdev->bd_disk); if (!base) return -ENODEV; - if (!intval && test_bit(DASD_FLAG_DEVICE_RO, &base->flags)) { - dasd_put_device(base); - return -EROFS; - } - set_disk_ro(bdev->bd_disk, intval); - rc = dasd_set_feature(base->cdev, DASD_FEATURE_READONLY, intval); + if (!ro && test_bit(DASD_FLAG_DEVICE_RO, &base->flags)) + rc = -EROFS; + else + rc = dasd_set_feature(base->cdev, DASD_FEATURE_READONLY, ro); dasd_put_device(base); return rc; } @@ -633,9 +624,6 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode, case BIODASDPRRST: rc = dasd_ioctl_reset_profile(block); break; - case BLKROSET: - rc = dasd_ioctl_set_ro(bdev, argp); - break; case DASDAPIVER: rc = dasd_ioctl_api_version(argp); break; diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index dfcbe54591fb..8d0de6adcad0 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -384,6 +384,20 @@ static ssize_t chp_chid_external_show(struct device *dev, } static DEVICE_ATTR(chid_external, 0444, chp_chid_external_show, NULL); +static ssize_t chp_esc_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct channel_path *chp = to_channelpath(dev); + ssize_t rc; + + mutex_lock(&chp->lock); + rc = sprintf(buf, "%x\n", chp->desc_fmt1.esc); + mutex_unlock(&chp->lock); + + return rc; +} +static DEVICE_ATTR(esc, 0444, chp_esc_show, NULL); + static ssize_t util_string_read(struct file *filp, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) @@ -414,6 +428,7 @@ static struct attribute *chp_attrs[] = { &dev_attr_shared.attr, &dev_attr_chid.attr, &dev_attr_chid_external.attr, + &dev_attr_esc.attr, NULL, }; static struct attribute_group chp_attr_group = { diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h index 20259f3fbf45..7ee9eba0abcb 100644 --- a/drivers/s390/cio/chp.h +++ b/drivers/s390/cio/chp.h @@ -23,6 +23,7 @@ #define CHP_OFFLINE 1 #define CHP_VARY_ON 2 #define CHP_VARY_OFF 3 +#define CHP_FCES_EVENT 4 struct chp_link { struct chp_id chpid; diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index fc06a4002168..c22d9ee27ba1 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -37,6 +37,9 @@ static void *sei_page; static void *chsc_page; static DEFINE_SPINLOCK(chsc_page_lock); +#define SEI_VF_FLA 0xc0 /* VF flag for Full Link Address */ +#define SEI_RS_CHPID 0x4 /* 4 in RS field indicates CHPID */ + /** * chsc_error_from_response() - convert a chsc response to an error * @response: chsc response code @@ -287,6 +290,15 @@ static void s390_process_res_acc(struct chp_link *link) css_schedule_reprobe(); } +static int process_fces_event(struct subchannel *sch, void *data) +{ + spin_lock_irq(sch->lock); + if (sch->driver && sch->driver->chp_event) + sch->driver->chp_event(sch, data, CHP_FCES_EVENT); + spin_unlock_irq(sch->lock); + return 0; +} + struct chsc_sei_nt0_area { u8 flags; u8 vf; /* validity flags */ @@ -364,6 +376,16 @@ static char *store_ebcdic(char *dest, const char *src, unsigned long len, return dest + len; } +static void chsc_link_from_sei(struct chp_link *link, + struct chsc_sei_nt0_area *sei_area) +{ + if ((sei_area->vf & SEI_VF_FLA) != 0) { + link->fla = sei_area->fla; + link->fla_mask = ((sei_area->vf & SEI_VF_FLA) == SEI_VF_FLA) ? + 0xffff : 0xff00; + } +} + /* Format node ID and parameters for output in LIR log message. */ static void format_node_data(char *params, char *id, struct node_descriptor *nd) { @@ -453,15 +475,7 @@ static void chsc_process_sei_res_acc(struct chsc_sei_nt0_area *sei_area) } memset(&link, 0, sizeof(struct chp_link)); link.chpid = chpid; - if ((sei_area->vf & 0xc0) != 0) { - link.fla = sei_area->fla; - if ((sei_area->vf & 0xc0) == 0xc0) - /* full link address */ - link.fla_mask = 0xffff; - else - /* link address */ - link.fla_mask = 0xff00; - } + chsc_link_from_sei(&link, sei_area); s390_process_res_acc(&link); } @@ -570,6 +584,33 @@ static void chsc_process_sei_ap_cfg_chg(struct chsc_sei_nt0_area *sei_area) ap_bus_cfg_chg(); } +static void chsc_process_sei_fces_event(struct chsc_sei_nt0_area *sei_area) +{ + struct chp_link link; + struct chp_id chpid; + struct channel_path *chp; + + CIO_CRW_EVENT(4, + "chsc: FCES status notification (rs=%02x, rs_id=%04x, FCES-status=%x)\n", + sei_area->rs, sei_area->rsid, sei_area->ccdf[0]); + + if (sei_area->rs != SEI_RS_CHPID) + return; + chp_id_init(&chpid); + chpid.id = sei_area->rsid; + + /* Ignore the event on unknown/invalid chp */ + chp = chpid_to_chp(chpid); + if (!chp) + return; + + memset(&link, 0, sizeof(struct chp_link)); + link.chpid = chpid; + chsc_link_from_sei(&link, sei_area); + + for_each_subchannel_staged(process_fces_event, NULL, &link); +} + static void chsc_process_sei_nt2(struct chsc_sei_nt2_area *sei_area) { switch (sei_area->cc) { @@ -611,6 +652,9 @@ static void chsc_process_sei_nt0(struct chsc_sei_nt0_area *sei_area) case 14: /* scm available notification */ chsc_process_sei_scm_avail(sei_area); break; + case 15: /* FCES event notification */ + chsc_process_sei_fces_event(sei_area); + break; default: /* other stuff */ CIO_CRW_EVENT(2, "chsc: sei nt0 unhandled cc=%d\n", sei_area->cc); @@ -1428,3 +1472,86 @@ int chsc_sgib(u32 origin) return ret; } EXPORT_SYMBOL_GPL(chsc_sgib); + +#define SCUD_REQ_LEN 0x10 /* SCUD request block length */ +#define SCUD_REQ_CMD 0x4b /* SCUD Command Code */ + +struct chse_cudb { + u16 flags:8; + u16 chp_valid:8; + u16 cu; + u32 esm_valid:8; + u32:24; + u8 chpid[8]; + u32:32; + u32:32; + u8 esm[8]; + u32 efla[8]; +} __packed; + +struct chsc_scud { + struct chsc_header request; + u16:4; + u16 fmt:4; + u16 cssid:8; + u16 first_cu; + u16:16; + u16 last_cu; + u32:32; + struct chsc_header response; + u16:4; + u16 fmt_resp:4; + u32:24; + struct chse_cudb cudb[]; +} __packed; + +/** + * chsc_scud() - Store control-unit description. + * @cu: number of the control-unit + * @esm: 8 1-byte endpoint security mode values + * @esm_valid: validity mask for @esm + * + * Interface to retrieve information about the endpoint security + * modes for up to 8 paths of a control unit. + * + * Returns 0 on success. + */ +int chsc_scud(u16 cu, u64 *esm, u8 *esm_valid) +{ + struct chsc_scud *scud = chsc_page; + int ret; + + spin_lock_irq(&chsc_page_lock); + memset(chsc_page, 0, PAGE_SIZE); + scud->request.length = SCUD_REQ_LEN; + scud->request.code = SCUD_REQ_CMD; + scud->fmt = 0; + scud->cssid = 0; + scud->first_cu = cu; + scud->last_cu = cu; + + ret = chsc(scud); + if (!ret) + ret = chsc_error_from_response(scud->response.code); + + if (!ret && (scud->response.length <= 8 || scud->fmt_resp != 0 + || !(scud->cudb[0].flags & 0x80) + || scud->cudb[0].cu != cu)) { + + CIO_MSG_EVENT(2, "chsc: scud failed rc=%04x, L2=%04x " + "FMT=%04x, cudb.flags=%02x, cudb.cu=%04x", + scud->response.code, scud->response.length, + scud->fmt_resp, scud->cudb[0].flags, scud->cudb[0].cu); + ret = -EINVAL; + } + + if (ret) + goto out; + + memcpy(esm, scud->cudb[0].esm, sizeof(*esm)); + *esm_valid = scud->cudb[0].esm_valid; +out: + spin_unlock_irq(&chsc_page_lock); + return ret; +} +EXPORT_SYMBOL_GPL(chsc_scud); diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index c2b83b68bc57..32fa7faa5bf6 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -27,7 +27,8 @@ struct channel_path_desc_fmt1 { u8 lsn; u8 desc; u8 chpid; - u32:24; + u32:16; + u8 esc; u8 chpp; u32 unused[2]; u16 chid; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e0005a4fc978..36583dc8406c 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1156,7 +1156,8 @@ static int io_subchannel_chp_event(struct subchannel *sch, struct chp_link *link, int event) { struct ccw_device *cdev = sch_get_cdev(sch); - int mask; + int mask, chpid, valid_bit; + int path_event[8]; mask = chp_ssd_get_mask(&sch->ssd_info, link); if (!mask) @@ -1191,6 +1192,18 @@ static int io_subchannel_chp_event(struct subchannel *sch, cdev->private->path_new_mask |= mask; io_subchannel_verify(sch); break; + case CHP_FCES_EVENT: + /* Forward Endpoint Security event */ + for (chpid = 0, valid_bit = 0x80; chpid < 8; chpid++, + valid_bit >>= 1) { + if (mask & valid_bit) + path_event[chpid] = PE_PATH_FCES_EVENT; + else + path_event[chpid] = PE_NONE; + } + if (cdev) + cdev->drv->path_event(cdev, path_event); + break; } return 0; } diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 6cb963a06777..37d450f46952 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -2359,8 +2359,7 @@ static void zfcp_fsf_req_trace(struct zfcp_fsf_req *req, struct scsi_cmnd *scsi) } } - blk_add_driver_data(scsi->request->q, scsi->request, &blktrc, - sizeof(blktrc)); + blk_add_driver_data(scsi->request, &blktrc, sizeof(blktrc)); } /** diff --git a/drivers/scsi/scsicam.c b/drivers/scsi/scsicam.c index 682cf08ab041..f1553a453616 100644 --- a/drivers/scsi/scsicam.c +++ b/drivers/scsi/scsicam.c @@ -32,7 +32,7 @@ */ unsigned char *scsi_bios_ptable(struct block_device *dev) { - struct address_space *mapping = dev->bd_contains->bd_inode->i_mapping; + struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping; unsigned char *res = NULL; struct page *page; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 656bcf4940d6..679c2c025047 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -630,13 +630,11 @@ static struct scsi_driver sd_template = { }; /* - * Dummy kobj_map->probe function. - * The default ->probe function will call modprobe, which is - * pointless as this module is already loaded. + * Don't request a new module, as that could deadlock in multipath + * environment. */ -static struct kobject *sd_default_probe(dev_t devt, int *partno, void *data) +static void sd_default_probe(dev_t devt) { - return NULL; } /* @@ -1750,10 +1748,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) static void sd_rescan(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - int ret; - ret = sd_revalidate_disk(sdkp->disk); - revalidate_disk_size(sdkp->disk, ret == 0); + sd_revalidate_disk(sdkp->disk); } static int sd_ioctl(struct block_device *bdev, fmode_t mode, @@ -3265,8 +3261,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sdkp->first_scan = 0; - set_capacity_revalidate_and_notify(disk, - logical_to_sectors(sdp, sdkp->capacity), false); + set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity)); sd_config_write_same(sdkp); kfree(buffer); @@ -3276,7 +3271,7 @@ static int sd_revalidate_disk(struct gendisk *disk) * capacity to 0. */ if (sd_zbc_revalidate_zones(sdkp)) - set_capacity_revalidate_and_notify(disk, 0, false); + set_capacity_and_notify(disk, 0); out: return 0; @@ -3528,9 +3523,6 @@ static int sd_remove(struct device *dev) free_opal_dev(sdkp->opal_dev); - blk_register_region(devt, SD_MINORS, NULL, - sd_default_probe, NULL, NULL); - mutex_lock(&sd_ref_mutex); dev_set_drvdata(dev, NULL); put_device(&sdkp->dev); @@ -3720,11 +3712,9 @@ static int __init init_sd(void) SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); for (i = 0; i < SD_MAJORS; i++) { - if (register_blkdev(sd_major(i), "sd") != 0) + if (__register_blkdev(sd_major(i), "sd", sd_default_probe)) continue; majors++; - blk_register_region(sd_major(i), SD_MINORS, NULL, - sd_default_probe, NULL, NULL); } if (!majors) @@ -3797,10 +3787,8 @@ static void __exit exit_sd(void) class_unregister(&sd_disk_class); - for (i = 0; i < SD_MAJORS; i++) { - blk_unregister_region(sd_major(i), SD_MINORS); + for (i = 0; i < SD_MAJORS; i++) unregister_blkdev(sd_major(i), "sd"); - } } module_init(init_sd); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index fd4b582110b2..e4633b84c556 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -416,19 +416,7 @@ static blk_status_t sr_init_command(struct scsi_cmnd *SCpnt) goto out; } - /* - * we do lazy blocksize switching (when reading XA sectors, - * see CDROMREADMODE2 ioctl) - */ s_size = cd->device->sector_size; - if (s_size > 2048) { - if (!in_interrupt()) - sr_set_blocklength(cd, 2048); - else - scmd_printk(KERN_INFO, SCpnt, - "can't switch blocksize: in interrupt\n"); - } - if (s_size != 512 && s_size != 1024 && s_size != 2048) { scmd_printk(KERN_ERR, SCpnt, "bad sector size %d\n", s_size); goto out; @@ -701,11 +689,6 @@ static int sr_open(struct cdrom_device_info *cdi, int purpose) static void sr_release(struct cdrom_device_info *cdi) { - struct scsi_cd *cd = cdi->handle; - - if (cd->device->sector_size > 2048) - sr_set_blocklength(cd, 2048); - } static int sr_probe(struct device *dev) diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index ffcf902da390..5703f8400b73 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -549,6 +549,8 @@ static int sr_read_sector(Scsi_CD *cd, int lba, int blksize, unsigned char *dest cgc.timeout = IOCTL_TIMEOUT; rc = sr_do_ioctl(cd, &cgc); + if (blksize != CD_FRAMESIZE) + rc |= sr_set_blocklength(cd, CD_FRAMESIZE); return rc; } diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ded00a89bfc4..2e4fa77445fd 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -399,6 +399,7 @@ static int storvsc_timeout = 180; static struct scsi_transport_template *fc_transport_template; #endif +static struct scsi_host_template scsi_driver; static void storvsc_on_channel_callback(void *context); #define STORVSC_MAX_LUNS_PER_TARGET 255 @@ -698,6 +699,12 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) memset(&props, 0, sizeof(struct vmstorage_channel_properties)); + /* + * The size of vmbus_requestor is an upper bound on the number of requests + * that can be in-progress at any one time across all channels. + */ + new_sc->rqstor_size = scsi_driver.can_queue; + ret = vmbus_open(new_sc, storvsc_ringbuffer_size, storvsc_ringbuffer_size, @@ -1242,9 +1249,17 @@ static void storvsc_on_channel_callback(void *context) foreach_vmbus_pkt(desc, channel) { void *packet = hv_pkt_data(desc); struct storvsc_cmd_request *request; + u64 cmd_rqst; - request = (struct storvsc_cmd_request *) - ((unsigned long)desc->trans_id); + cmd_rqst = vmbus_request_addr(&channel->requestor, + desc->trans_id); + if (cmd_rqst == VMBUS_RQST_ERROR) { + dev_err(&device->device, + "Incorrect transaction id\n"); + continue; + } + + request = (struct storvsc_cmd_request *)(unsigned long)cmd_rqst; if (request == &stor_device->init_request || request == &stor_device->reset_request) { @@ -1265,6 +1280,12 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, memset(&props, 0, sizeof(struct vmstorage_channel_properties)); + /* + * The size of vmbus_requestor is an upper bound on the number of requests + * that can be in-progress at any one time across all channels. + */ + device->channel->rqstor_size = scsi_driver.can_queue; + ret = vmbus_open(device->channel, ring_size, ring_size, @@ -1572,7 +1593,6 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) struct vstor_packet *vstor_packet; int ret, t; - stor_device = get_out_stor_device(device); if (!stor_device) return FAILED; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 7143d03f0e02..b0cb5b95e892 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -133,10 +133,10 @@ static int fd_configure_device(struct se_device *dev) */ inode = file->f_mapping->host; if (S_ISBLK(inode->i_mode)) { - struct request_queue *q = bdev_get_queue(inode->i_bdev); + struct request_queue *q = bdev_get_queue(I_BDEV(inode)); unsigned long long dev_size; - fd_dev->fd_block_size = bdev_logical_block_size(inode->i_bdev); + fd_dev->fd_block_size = bdev_logical_block_size(I_BDEV(inode)); /* * Determine the number of bytes from i_size_read() minus * one (1) logical sector from underlying struct block_device @@ -559,7 +559,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) if (S_ISBLK(inode->i_mode)) { /* The backend is block device, use discard */ - struct block_device *bdev = inode->i_bdev; + struct block_device *bdev = I_BDEV(inode); struct se_device *dev = cmd->se_dev; ret = blkdev_issue_discard(bdev, diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 4e37fa9b409d..7994f27e4527 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1029,9 +1029,8 @@ static sector_t pscsi_get_blocks(struct se_device *dev) { struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); - if (pdv->pdv_bd && pdv->pdv_bd->bd_part) - return pdv->pdv_bd->bd_part->nr_sects; - + if (pdv->pdv_bd) + return bdev_nr_sectors(pdv->pdv_bd); return 0; } diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index f7e6c42558eb..b859a158a414 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -204,7 +204,7 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (!(filp->f_mode & FMODE_WRITE)) ro = 1; - inode = file_inode(filp); + inode = filp->f_mapping->host; if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { LINFO(curlun, "invalid file type: %s\n", filename); goto out; @@ -221,7 +221,7 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (!(filp->f_mode & FMODE_CAN_WRITE)) ro = 1; - size = i_size_read(inode->i_mapping->host); + size = i_size_read(inode); if (size < 0) { LINFO(curlun, "unable to find file size: %s\n", filename); rc = (int) size; @@ -231,8 +231,8 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename) if (curlun->cdrom) { blksize = 2048; blkbits = 11; - } else if (inode->i_bdev) { - blksize = bdev_logical_block_size(inode->i_bdev); + } else if (S_ISBLK(inode->i_mode)) { + blksize = bdev_logical_block_size(I_BDEV(inode)); blkbits = blksize_bits(blksize); } else { blksize = 512; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 4b99ec3dec58..e7c692cfb2cf 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -689,7 +689,7 @@ static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, /* watch the backend node for backend configuration information */ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, - xen_pcibk_be_watch); + NULL, xen_pcibk_be_watch); if (err) goto out; diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 5f5b8a7d5b80..2a93b7c9c159 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -44,6 +44,8 @@ struct xen_bus_type { int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*probe)(struct xen_bus_type *bus, const char *type, const char *dir); + bool (*otherend_will_handle)(struct xenbus_watch *watch, + const char *path, const char *token); void (*otherend_changed)(struct xenbus_watch *watch, const char *path, const char *token); struct bus_type bus; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index fd80e318b99c..0cd728961fce 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -127,18 +127,22 @@ EXPORT_SYMBOL_GPL(xenbus_strstate); */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *)) { int err; watch->node = path; + watch->will_handle = will_handle; watch->callback = callback; err = register_xenbus_watch(watch); if (err) { watch->node = NULL; + watch->will_handle = NULL; watch->callback = NULL; xenbus_dev_fatal(dev, err, "adding watch on %s", path); } @@ -165,6 +169,8 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); */ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *), const char *pathfmt, ...) @@ -181,7 +187,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); return -ENOMEM; } - err = xenbus_watch_path(dev, path, watch, callback); + err = xenbus_watch_path(dev, path, watch, will_handle, callback); if (err) kfree(path); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 38725d97d909..44634d970a5c 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -136,6 +136,7 @@ static int watch_otherend(struct xenbus_device *dev) container_of(dev->dev.bus, struct xen_bus_type, bus); return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_will_handle, bus->otherend_changed, "%s/%s", dev->otherend, "state"); } diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 2ba699897e6d..5abded97e1a7 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -180,6 +180,12 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, return err; } +static bool frontend_will_handle(struct xenbus_watch *watch, + const char *path, const char *token) +{ + return watch->nr_pending == 0; +} + static void frontend_changed(struct xenbus_watch *watch, const char *path, const char *token) { @@ -191,6 +197,7 @@ static struct xen_bus_type xenbus_backend = { .levels = 3, /* backend/type// */ .get_bus_id = backend_bus_id, .probe = xenbus_probe_backend, + .otherend_will_handle = frontend_will_handle, .otherend_changed = frontend_changed, .bus = { .name = "xen-backend", diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3a06eb699f33..12e02eb01f59 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -705,9 +705,13 @@ int xs_watch_msg(struct xs_watch_event *event) spin_lock(&watches_lock); event->handle = find_watch(event->token); - if (event->handle != NULL) { + if (event->handle != NULL && + (!event->handle->will_handle || + event->handle->will_handle(event->handle, + event->path, event->token))) { spin_lock(&watch_events_lock); list_add_tail(&event->list, &watch_events); + event->handle->nr_pending++; wake_up(&watch_events_waitq); spin_unlock(&watch_events_lock); } else @@ -765,6 +769,8 @@ int register_xenbus_watch(struct xenbus_watch *watch) sprintf(token, "%lX", (long)watch); + watch->nr_pending = 0; + down_read(&xs_watch_rwsem); spin_lock(&watches_lock); @@ -814,11 +820,14 @@ void unregister_xenbus_watch(struct xenbus_watch *watch) /* Cancel pending watch events. */ spin_lock(&watch_events_lock); - list_for_each_entry_safe(event, tmp, &watch_events, list) { - if (event->handle != watch) - continue; - list_del(&event->list); - kfree(event); + if (watch->nr_pending) { + list_for_each_entry_safe(event, tmp, &watch_events, list) { + if (event->handle != watch) + continue; + list_del(&event->list); + kfree(event); + } + watch->nr_pending = 0; } spin_unlock(&watch_events_lock); @@ -865,7 +874,6 @@ void xs_suspend_cancel(void) static int xenwatch_thread(void *unused) { - struct list_head *ent; struct xs_watch_event *event; xenwatch_pid = current->pid; @@ -880,13 +888,15 @@ static int xenwatch_thread(void *unused) mutex_lock(&xenwatch_mutex); spin_lock(&watch_events_lock); - ent = watch_events.next; - if (ent != &watch_events) - list_del(ent); + event = list_first_entry_or_null(&watch_events, + struct xs_watch_event, list); + if (event) { + list_del(&event->list); + event->handle->nr_pending--; + } spin_unlock(&watch_events_lock); - if (ent != &watch_events) { - event = list_entry(ent, struct xs_watch_event, list); + if (event) { event->handle->callback(event->handle, event->path, event->token); kfree(event); diff --git a/fs/block_dev.c b/fs/block_dev.c index 9e84b1928b94..9e56ee1f2652 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -110,24 +111,20 @@ EXPORT_SYMBOL(invalidate_bdev); int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, loff_t lend) { - struct block_device *claimed_bdev = NULL; - int err; - /* * If we don't hold exclusive handle for the device, upgrade to it * while we discard the buffer cache to avoid discarding buffers * under live filesystem. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bdev->bd_contains; - err = bd_prepare_to_claim(bdev, claimed_bdev, - truncate_bdev_range); + int err = bd_prepare_to_claim(bdev, truncate_bdev_range); if (err) return err; } + truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); - if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range); + if (!(mode & FMODE_EXCL)) + bd_abort_claiming(bdev, truncate_bdev_range); return 0; } EXPORT_SYMBOL(truncate_bdev_range); @@ -548,55 +545,47 @@ EXPORT_SYMBOL(fsync_bdev); * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze * actually. */ -struct super_block *freeze_bdev(struct block_device *bdev) +int freeze_bdev(struct block_device *bdev) { struct super_block *sb; int error = 0; mutex_lock(&bdev->bd_fsfreeze_mutex); - if (++bdev->bd_fsfreeze_count > 1) { - /* - * We don't even need to grab a reference - the first call - * to freeze_bdev grab an active reference and only the last - * thaw_bdev drops it. - */ - sb = get_super(bdev); - if (sb) - drop_super(sb); - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; - } + if (++bdev->bd_fsfreeze_count > 1) + goto done; sb = get_active_super(bdev); if (!sb) - goto out; + goto sync; if (sb->s_op->freeze_super) error = sb->s_op->freeze_super(sb); else error = freeze_super(sb); - if (error) { - deactivate_super(sb); - bdev->bd_fsfreeze_count--; - mutex_unlock(&bdev->bd_fsfreeze_mutex); - return ERR_PTR(error); - } deactivate_super(sb); - out: + + if (error) { + bdev->bd_fsfreeze_count--; + goto done; + } + bdev->bd_fsfreeze_sb = sb; + +sync: sync_blockdev(bdev); +done: mutex_unlock(&bdev->bd_fsfreeze_mutex); - return sb; /* thaw_bdev releases s->s_umount */ + return error; } EXPORT_SYMBOL(freeze_bdev); /** * thaw_bdev -- unlock filesystem * @bdev: blockdevice to unlock - * @sb: associated superblock * * Unlocks the filesystem and marks it writeable again after freeze_bdev(). */ -int thaw_bdev(struct block_device *bdev, struct super_block *sb) +int thaw_bdev(struct block_device *bdev) { + struct super_block *sb; int error = -EINVAL; mutex_lock(&bdev->bd_fsfreeze_mutex); @@ -607,6 +596,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb) if (--bdev->bd_fsfreeze_count > 0) goto out; + sb = bdev->bd_fsfreeze_sb; if (!sb) goto out; @@ -792,23 +782,19 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) static void bdev_free_inode(struct inode *inode) { + struct block_device *bdev = I_BDEV(inode); + + free_percpu(bdev->bd_stats); + kfree(bdev->bd_meta_info); + kmem_cache_free(bdev_cachep, BDEV_I(inode)); } -static void init_once(void *foo) +static void init_once(void *data) { - struct bdev_inode *ei = (struct bdev_inode *) foo; - struct block_device *bdev = &ei->bdev; + struct bdev_inode *ei = data; - memset(bdev, 0, sizeof(*bdev)); - mutex_init(&bdev->bd_mutex); -#ifdef CONFIG_SYSFS - INIT_LIST_HEAD(&bdev->bd_holder_disks); -#endif - bdev->bd_bdi = &noop_backing_dev_info; inode_init_once(&ei->vfs_inode); - /* Initialize mutex for freeze. */ - mutex_init(&bdev->bd_fsfreeze_mutex); } static void bdev_evict_inode(struct inode *inode) @@ -870,72 +856,72 @@ void __init bdev_cache_init(void) blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } -/* - * Most likely _very_ bad one - but then it's hardly critical for small - * /dev and can be fixed when somebody will need really large one. - * Keep in mind that it will be fed through icache hash function too. - */ -static inline unsigned long hash(dev_t dev) -{ - return MAJOR(dev)+MINOR(dev); -} - -static int bdev_test(struct inode *inode, void *data) -{ - return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; -} - -static int bdev_set(struct inode *inode, void *data) -{ - BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; - return 0; -} - -static struct block_device *bdget(dev_t dev) +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) { struct block_device *bdev; struct inode *inode; - inode = iget5_locked(blockdev_superblock, hash(dev), - bdev_test, bdev_set, &dev); - + inode = new_inode(blockdev_superblock); if (!inode) return NULL; + inode->i_mode = S_IFBLK; + inode->i_rdev = 0; + inode->i_data.a_ops = &def_blk_aops; + mapping_set_gfp_mask(&inode->i_data, GFP_USER); - bdev = &BDEV_I(inode)->bdev; - - if (inode->i_state & I_NEW) { - spin_lock_init(&bdev->bd_size_lock); - bdev->bd_contains = NULL; - bdev->bd_super = NULL; - bdev->bd_inode = inode; - bdev->bd_part_count = 0; - inode->i_mode = S_IFBLK; - inode->i_rdev = dev; - inode->i_bdev = bdev; - inode->i_data.a_ops = &def_blk_aops; - mapping_set_gfp_mask(&inode->i_data, GFP_USER); - unlock_new_inode(inode); + bdev = I_BDEV(inode); + memset(bdev, 0, sizeof(*bdev)); + mutex_init(&bdev->bd_mutex); + mutex_init(&bdev->bd_fsfreeze_mutex); + spin_lock_init(&bdev->bd_size_lock); + bdev->bd_disk = disk; + bdev->bd_partno = partno; + bdev->bd_inode = inode; + bdev->bd_bdi = &noop_backing_dev_info; +#ifdef CONFIG_SYSFS + INIT_LIST_HEAD(&bdev->bd_holder_disks); +#endif + bdev->bd_stats = alloc_percpu(struct disk_stats); + if (!bdev->bd_stats) { + iput(inode); + return NULL; } return bdev; } +void bdev_add(struct block_device *bdev, dev_t dev) +{ + bdev->bd_dev = dev; + bdev->bd_inode->i_rdev = dev; + bdev->bd_inode->i_ino = dev; + insert_inode_hash(bdev->bd_inode); +} + +static struct block_device *bdget(dev_t dev) +{ + struct inode *inode; + + inode = ilookup(blockdev_superblock, dev); + if (!inode) + return NULL; + return &BDEV_I(inode)->bdev; +} + /** * bdgrab -- Grab a reference to an already referenced block device * @bdev: Block device to grab a reference to. + * + * Returns the block_device with an additional reference when successful, + * or NULL if the inode is already beeing freed. */ struct block_device *bdgrab(struct block_device *bdev) { - ihold(bdev->bd_inode); + if (!igrab(bdev->bd_inode)) + return NULL; return bdev; } EXPORT_SYMBOL(bdgrab); -struct block_device *bdget_part(struct hd_struct *part) -{ - return bdget(part_devt(part)); -} - long nr_blockdev_pages(void) { struct inode *inode; @@ -953,67 +939,8 @@ void bdput(struct block_device *bdev) { iput(bdev->bd_inode); } - EXPORT_SYMBOL(bdput); -static struct block_device *bd_acquire(struct inode *inode) -{ - struct block_device *bdev; - - spin_lock(&bdev_lock); - bdev = inode->i_bdev; - if (bdev && !inode_unhashed(bdev->bd_inode)) { - bdgrab(bdev); - spin_unlock(&bdev_lock); - return bdev; - } - spin_unlock(&bdev_lock); - - /* - * i_bdev references block device inode that was already shut down - * (corresponding device got removed). Remove the reference and look - * up block device inode again just in case new device got - * reestablished under the same device number. - */ - if (bdev) - bd_forget(inode); - - bdev = bdget(inode->i_rdev); - if (bdev) { - spin_lock(&bdev_lock); - if (!inode->i_bdev) { - /* - * We take an additional reference to bd_inode, - * and it's released in clear_inode() of inode. - * So, we can access it via ->i_mapping always - * without igrab(). - */ - bdgrab(bdev); - inode->i_bdev = bdev; - inode->i_mapping = bdev->bd_inode->i_mapping; - } - spin_unlock(&bdev_lock); - } - return bdev; -} - -/* Call when you free inode */ - -void bd_forget(struct inode *inode) -{ - struct block_device *bdev = NULL; - - spin_lock(&bdev_lock); - if (!sb_is_blkdev_sb(inode->i_sb)) - bdev = inode->i_bdev; - inode->i_bdev = NULL; - inode->i_mapping = &inode->i_data; - spin_unlock(&bdev_lock); - - if (bdev) - bdput(bdev); -} - /** * bd_may_claim - test whether a block device can be claimed * @bdev: block device of interest @@ -1049,7 +976,6 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, /** * bd_prepare_to_claim - claim a block device * @bdev: block device of interest - * @whole: the whole device containing @bdev, may equal @bdev * @holder: holder trying to claim @bdev * * Claim @bdev. This function fails if @bdev is already claimed by another @@ -1059,9 +985,12 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole, * RETURNS: * 0 if @bdev can be claimed, -EBUSY otherwise. */ -int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, - void *holder) +int bd_prepare_to_claim(struct block_device *bdev, void *holder) { + struct block_device *whole = bdev_whole(bdev); + + if (WARN_ON_ONCE(!holder)) + return -EINVAL; retry: spin_lock(&bdev_lock); /* if someone else claimed, fail */ @@ -1089,27 +1018,6 @@ int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, } EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */ -static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) -{ - struct gendisk *disk = get_gendisk(bdev->bd_dev, partno); - - if (!disk) - return NULL; - /* - * Now that we hold gendisk reference we make sure bdev we looked up is - * not stale. If it is, it means device got removed and created before - * we looked up gendisk and we fail open in such case. Associating - * unhashed bdev with newly created gendisk could lead to two bdevs - * (and thus two independent caches) being associated with one device - * which is bad. - */ - if (inode_unhashed(bdev->bd_inode)) { - put_disk_and_module(disk); - return NULL; - } - return disk; -} - static void bd_clear_claiming(struct block_device *whole, void *holder) { lockdep_assert_held(&bdev_lock); @@ -1122,15 +1030,15 @@ static void bd_clear_claiming(struct block_device *whole, void *holder) /** * bd_finish_claiming - finish claiming of a block device * @bdev: block device of interest - * @whole: whole block device * @holder: holder that has claimed @bdev * * Finish exclusive open of a block device. Mark the device as exlusively * open by the holder and wake up all waiters for exclusive open to finish. */ -static void bd_finish_claiming(struct block_device *bdev, - struct block_device *whole, void *holder) +static void bd_finish_claiming(struct block_device *bdev, void *holder) { + struct block_device *whole = bdev_whole(bdev); + spin_lock(&bdev_lock); BUG_ON(!bd_may_claim(bdev, whole, holder)); /* @@ -1155,11 +1063,10 @@ static void bd_finish_claiming(struct block_device *bdev, * also used when exclusive open is not actually desired and we just needed * to block other exclusive openers for a while. */ -void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, - void *holder) +void bd_abort_claiming(struct block_device *bdev, void *holder) { spin_lock(&bdev_lock); - bd_clear_claiming(whole, holder); + bd_clear_claiming(bdev_whole(bdev), holder); spin_unlock(&bdev_lock); } EXPORT_SYMBOL(bd_abort_claiming); @@ -1230,7 +1137,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) WARN_ON_ONCE(!bdev->bd_holder); /* FIXME: remove the following once add_disk() handles errors */ - if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) + if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir)) goto out_unlock; holder = bd_find_holder_disk(bdev, disk); @@ -1249,24 +1156,24 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) holder->disk = disk; holder->refcnt = 1; - ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + ret = add_symlink(disk->slave_dir, bdev_kobj(bdev)); if (ret) goto out_free; - ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); + ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); if (ret) goto out_del; /* * bdev could be deleted beneath us which would implicitly destroy * the holder directory. Hold on to it. */ - kobject_get(bdev->bd_part->holder_dir); + kobject_get(bdev->bd_holder_dir); list_add(&holder->list, &bdev->bd_holder_disks); goto out_unlock; out_del: - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); out_free: kfree(holder); out_unlock: @@ -1294,10 +1201,9 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) holder = bd_find_holder_disk(bdev, disk); if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); - del_symlink(bdev->bd_part->holder_dir, - &disk_to_dev(disk)->kobj); - kobject_put(bdev->bd_part->holder_dir); + del_symlink(disk->slave_dir, bdev_kobj(bdev)); + del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); + kobject_put(bdev->bd_holder_dir); list_del_init(&holder->list); kfree(holder); } @@ -1307,77 +1213,6 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif -/** - * check_disk_size_change - checks for disk size change and adjusts bdev size. - * @disk: struct gendisk to check - * @bdev: struct bdev to adjust. - * @verbose: if %true log a message about a size change if there is any - * - * This routine checks to see if the bdev size does not match the disk size - * and adjusts it if it differs. When shrinking the bdev size, its all caches - * are freed. - */ -static void check_disk_size_change(struct gendisk *disk, - struct block_device *bdev, bool verbose) -{ - loff_t disk_size, bdev_size; - - spin_lock(&bdev->bd_size_lock); - disk_size = (loff_t)get_capacity(disk) << 9; - bdev_size = i_size_read(bdev->bd_inode); - if (disk_size != bdev_size) { - if (verbose) { - printk(KERN_INFO - "%s: detected capacity change from %lld to %lld\n", - disk->disk_name, bdev_size, disk_size); - } - i_size_write(bdev->bd_inode, disk_size); - } - spin_unlock(&bdev->bd_size_lock); - - if (bdev_size > disk_size) { - if (__invalidate_device(bdev, false)) - pr_warn("VFS: busy inodes on resized disk %s\n", - disk->disk_name); - } -} - -/** - * revalidate_disk_size - checks for disk size change and adjusts bdev size. - * @disk: struct gendisk to check - * @verbose: if %true log a message about a size change if there is any - * - * This routine checks to see if the bdev size does not match the disk size - * and adjusts it if it differs. When shrinking the bdev size, its all caches - * are freed. - */ -void revalidate_disk_size(struct gendisk *disk, bool verbose) -{ - struct block_device *bdev; - - /* - * Hidden disks don't have associated bdev so there's no point in - * revalidating them. - */ - if (disk->flags & GENHD_FL_HIDDEN) - return; - - bdev = bdget_disk(disk, 0); - if (bdev) { - check_disk_size_change(disk, bdev, verbose); - bdput(bdev); - } -} -EXPORT_SYMBOL(revalidate_disk_size); - -void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) -{ - spin_lock(&bdev->bd_size_lock); - i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); -} -EXPORT_SYMBOL(bd_set_nr_sectors); - static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); int bdev_disk_changed(struct block_device *bdev, bool invalidate) @@ -1411,8 +1246,6 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) disk->fops->revalidate_disk(disk); } - check_disk_size_change(disk, bdev, !invalidate); - if (get_capacity(disk)) { ret = blk_add_partitions(disk, bdev); if (ret == -EAGAIN) @@ -1439,71 +1272,19 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); * mutex_lock(part->bd_mutex) * mutex_lock_nested(whole->bd_mutex, 1) */ - -static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, - int for_part) +static int __blkdev_get(struct block_device *bdev, fmode_t mode) { - struct block_device *whole = NULL, *claiming = NULL; - struct gendisk *disk; - int ret; - int partno; - bool first_open = false, unblock_events = true, need_restart; + struct gendisk *disk = bdev->bd_disk; + int ret = 0; - restart: - need_restart = false; - ret = -ENXIO; - disk = bdev_get_gendisk(bdev, &partno); - if (!disk) - goto out; - - if (partno) { - whole = bdget_disk(disk, 0); - if (!whole) { - ret = -ENOMEM; - goto out_put_disk; - } - } - - if (!for_part && (mode & FMODE_EXCL)) { - WARN_ON_ONCE(!holder); - if (whole) - claiming = whole; - else - claiming = bdev; - ret = bd_prepare_to_claim(bdev, claiming, holder); - if (ret) - goto out_put_whole; - } - - disk_block_events(disk); - mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { - first_open = true; - bdev->bd_disk = disk; - bdev->bd_contains = bdev; - bdev->bd_partno = partno; - - if (!partno) { - ret = -ENXIO; - bdev->bd_part = disk_get_part(disk, partno); - if (!bdev->bd_part) - goto out_clear; - + if (!bdev_is_partition(bdev)) { ret = 0; - if (disk->fops->open) { + if (disk->fops->open) ret = disk->fops->open(bdev, mode); - /* - * If we lost a race with 'disk' being deleted, - * try again. See md.c - */ - if (ret == -ERESTARTSYS) - need_restart = true; - } - if (!ret) { - bd_set_nr_sectors(bdev, get_capacity(disk)); + if (!ret) set_init_blocksize(bdev); - } /* * If the device is invalidated, rescan partition @@ -1516,28 +1297,33 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) - goto out_clear; + return ret; } else { - BUG_ON(for_part); - ret = __blkdev_get(whole, mode, NULL, 1); - if (ret) - goto out_clear; - bdev->bd_contains = bdgrab(whole); - bdev->bd_part = disk_get_part(disk, partno); - if (!(disk->flags & GENHD_FL_UP) || - !bdev->bd_part || !bdev->bd_part->nr_sects) { - ret = -ENXIO; - goto out_clear; + struct block_device *whole = bdgrab(disk->part0); + + mutex_lock_nested(&whole->bd_mutex, 1); + ret = __blkdev_get(whole, mode); + if (ret) { + mutex_unlock(&whole->bd_mutex); + bdput(whole); + return ret; + } + whole->bd_part_count++; + mutex_unlock(&whole->bd_mutex); + + if (!(disk->flags & GENHD_FL_UP) || + !bdev_nr_sectors(bdev)) { + __blkdev_put(whole, mode, 1); + bdput(whole); + return -ENXIO; } - bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects); set_init_blocksize(bdev); } if (bdev->bd_bdi == &noop_backing_dev_info) bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } else { - if (bdev->bd_contains == bdev) { - ret = 0; + if (!bdev_is_partition(bdev)) { if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ @@ -1545,141 +1331,54 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) - goto out_unlock_bdev; + return ret; } } bdev->bd_openers++; - if (for_part) - bdev->bd_part_count++; - if (claiming) - bd_finish_claiming(bdev, claiming, holder); - - /* - * Block event polling for write claims if requested. Any write holder - * makes the write_holder state stick until all are released. This is - * good enough and tracking individual writeable reference is too - * fragile given the way @mode is used in blkdev_get/put(). - */ - if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder && - (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { - bdev->bd_write_holder = true; - unblock_events = false; - } - mutex_unlock(&bdev->bd_mutex); - - if (unblock_events) - disk_unblock_events(disk); - - /* only one opener holds refs to the module and disk */ - if (!first_open) - put_disk_and_module(disk); - if (whole) - bdput(whole); return 0; - - out_clear: - disk_put_part(bdev->bd_part); - bdev->bd_disk = NULL; - bdev->bd_part = NULL; - if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, mode, 1); - bdev->bd_contains = NULL; - out_unlock_bdev: - if (claiming) - bd_abort_claiming(bdev, claiming, holder); - mutex_unlock(&bdev->bd_mutex); - disk_unblock_events(disk); - out_put_whole: - if (whole) - bdput(whole); - out_put_disk: - put_disk_and_module(disk); - if (need_restart) - goto restart; - out: - return ret; } -/** - * blkdev_get - open a block device - * @bdev: block_device to open - * @mode: FMODE_* mask - * @holder: exclusive holder identifier - * - * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is - * open with exclusive access. Specifying %FMODE_EXCL with %NULL - * @holder is invalid. Exclusive opens may nest for the same @holder. - * - * On success, the reference count of @bdev is unchanged. On failure, - * @bdev is put. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * 0 on success, -errno on failure. - */ -static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) -{ - int ret, perm = 0; - - if (mode & FMODE_READ) - perm |= MAY_READ; - if (mode & FMODE_WRITE) - perm |= MAY_WRITE; - ret = devcgroup_inode_permission(bdev->bd_inode, perm); - if (ret) - goto bdput; - - ret =__blkdev_get(bdev, mode, holder, 0); - if (ret) - goto bdput; - return 0; - -bdput: - bdput(bdev); - return ret; -} - -/** - * blkdev_get_by_path - open a block device by name - * @path: path to the block device to open - * @mode: FMODE_* mask - * @holder: exclusive holder identifier - * - * Open the blockdevice described by the device file at @path. @mode - * and @holder are identical to blkdev_get(). - * - * On success, the returned block_device has reference count of one. - * - * CONTEXT: - * Might sleep. - * - * RETURNS: - * Pointer to block_device on success, ERR_PTR(-errno) on failure. - */ -struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder) +struct block_device *blkdev_get_no_open(dev_t dev) { struct block_device *bdev; - int err; + struct gendisk *disk; - bdev = lookup_bdev(path); - if (IS_ERR(bdev)) - return bdev; + down_read(&bdev_lookup_sem); + bdev = bdget(dev); + if (!bdev) { + up_read(&bdev_lookup_sem); + blk_request_module(dev); + down_read(&bdev_lookup_sem); - err = blkdev_get(bdev, mode, holder); - if (err) - return ERR_PTR(err); - - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { - blkdev_put(bdev, mode); - return ERR_PTR(-EACCES); + bdev = bdget(dev); + if (!bdev) + goto unlock; } + disk = bdev->bd_disk; + if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) + goto bdput; + if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) + goto put_disk; + if (!try_module_get(bdev->bd_disk->fops->owner)) + goto put_disk; + up_read(&bdev_lookup_sem); return bdev; +put_disk: + put_disk(disk); +bdput: + bdput(bdev); +unlock: + up_read(&bdev_lookup_sem); + return NULL; +} + +void blkdev_put_no_open(struct block_device *bdev) +{ + module_put(bdev->bd_disk->fops->owner); + put_disk(bdev->bd_disk); + bdput(bdev); } -EXPORT_SYMBOL(blkdev_get_by_path); /** * blkdev_get_by_dev - open a block device by device number @@ -1687,38 +1386,128 @@ EXPORT_SYMBOL(blkdev_get_by_path); * @mode: FMODE_* mask * @holder: exclusive holder identifier * - * Open the blockdevice described by device number @dev. @mode and - * @holder are identical to blkdev_get(). + * Open the block device described by device number @dev. If @mode includes + * %FMODE_EXCL, the block device is opened with exclusive access. Specifying + * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for + * the same @holder. * - * Use it ONLY if you really do not have anything better - i.e. when - * you are behind a truly sucky interface and all you are given is a - * device number. _Never_ to be used for internal purposes. If you - * ever need it - reconsider your API. - * - * On success, the returned block_device has reference count of one. + * Use this interface ONLY if you really do not have anything better - i.e. when + * you are behind a truly sucky interface and all you are given is a device + * number. Everything else should use blkdev_get_by_path(). * * CONTEXT: * Might sleep. * * RETURNS: - * Pointer to block_device on success, ERR_PTR(-errno) on failure. + * Reference to the block_device on success, ERR_PTR(-errno) on failure. */ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) { + bool unblock_events = true; struct block_device *bdev; - int err; + struct gendisk *disk; + int ret; - bdev = bdget(dev); + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, + MAJOR(dev), MINOR(dev), + ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) | + ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0)); + if (ret) + return ERR_PTR(ret); + + /* + * If we lost a race with 'disk' being deleted, try again. See md.c. + */ +retry: + bdev = blkdev_get_no_open(dev); if (!bdev) - return ERR_PTR(-ENOMEM); + return ERR_PTR(-ENXIO); + disk = bdev->bd_disk; - err = blkdev_get(bdev, mode, holder); - if (err) - return ERR_PTR(err); + if (mode & FMODE_EXCL) { + ret = bd_prepare_to_claim(bdev, holder); + if (ret) + goto put_blkdev; + } + + disk_block_events(disk); + + mutex_lock(&bdev->bd_mutex); + ret =__blkdev_get(bdev, mode); + if (ret) + goto abort_claiming; + if (mode & FMODE_EXCL) { + bd_finish_claiming(bdev, holder); + + /* + * Block event polling for write claims if requested. Any write + * holder makes the write_holder state stick until all are + * released. This is good enough and tracking individual + * writeable reference is too fragile given the way @mode is + * used in blkdev_get/put(). + */ + if ((mode & FMODE_WRITE) && !bdev->bd_write_holder && + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { + bdev->bd_write_holder = true; + unblock_events = false; + } + } + mutex_unlock(&bdev->bd_mutex); + + if (unblock_events) + disk_unblock_events(disk); + return bdev; + +abort_claiming: + if (mode & FMODE_EXCL) + bd_abort_claiming(bdev, holder); + mutex_unlock(&bdev->bd_mutex); + disk_unblock_events(disk); +put_blkdev: + blkdev_put_no_open(bdev); + if (ret == -ERESTARTSYS) + goto retry; + return ERR_PTR(ret); +} +EXPORT_SYMBOL(blkdev_get_by_dev); + +/** + * blkdev_get_by_path - open a block device by name + * @path: path to the block device to open + * @mode: FMODE_* mask + * @holder: exclusive holder identifier + * + * Open the block device described by the device file at @path. If @mode + * includes %FMODE_EXCL, the block device is opened with exclusive access. + * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may + * nest for the same @holder. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * Reference to the block_device on success, ERR_PTR(-errno) on failure. + */ +struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder) +{ + struct block_device *bdev; + dev_t dev; + int error; + + error = lookup_bdev(path, &dev); + if (error) + return ERR_PTR(error); + + bdev = blkdev_get_by_dev(dev, mode, holder); + if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { + blkdev_put(bdev, mode); + return ERR_PTR(-EACCES); + } return bdev; } -EXPORT_SYMBOL(blkdev_get_by_dev); +EXPORT_SYMBOL(blkdev_get_by_path); static int blkdev_open(struct inode * inode, struct file * filp) { @@ -1741,14 +1530,12 @@ static int blkdev_open(struct inode * inode, struct file * filp) if ((filp->f_flags & O_ACCMODE) == 3) filp->f_mode |= FMODE_WRITE_IOCTL; - bdev = bd_acquire(inode); - if (bdev == NULL) - return -ENOMEM; - + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); filp->f_mapping = bdev->bd_inode->i_mapping; filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); - - return blkdev_get(bdev, filp->f_mode, filp); + return 0; } static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) @@ -1774,34 +1561,28 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); - bdev_write_inode(bdev); + if (bdev_is_partition(bdev)) + victim = bdev_whole(bdev); } - if (bdev->bd_contains == bdev) { - if (disk->fops->release) - disk->fops->release(disk, mode); - } - if (!bdev->bd_openers) { - disk_put_part(bdev->bd_part); - bdev->bd_part = NULL; - bdev->bd_disk = NULL; - if (bdev != bdev->bd_contains) - victim = bdev->bd_contains; - bdev->bd_contains = NULL; - put_disk_and_module(disk); - } + if (!bdev_is_partition(bdev) && disk->fops->release) + disk->fops->release(disk, mode); mutex_unlock(&bdev->bd_mutex); - bdput(bdev); - if (victim) + if (victim) { __blkdev_put(victim, mode, 1); + bdput(victim); + } } void blkdev_put(struct block_device *bdev, fmode_t mode) { + struct gendisk *disk = bdev->bd_disk; + mutex_lock(&bdev->bd_mutex); if (mode & FMODE_EXCL) { + struct block_device *whole = bdev_whole(bdev); bool bdev_free; /* @@ -1812,13 +1593,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) spin_lock(&bdev_lock); WARN_ON_ONCE(--bdev->bd_holders < 0); - WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); + WARN_ON_ONCE(--whole->bd_holders < 0); - /* bd_contains might point to self, check in a separate step */ if ((bdev_free = !bdev->bd_holders)) bdev->bd_holder = NULL; - if (!bdev->bd_contains->bd_holders) - bdev->bd_contains->bd_holder = NULL; + if (!whole->bd_holders) + whole->bd_holder = NULL; spin_unlock(&bdev_lock); @@ -1827,7 +1607,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * unblock evpoll if it was a write holder. */ if (bdev_free && bdev->bd_write_holder) { - disk_unblock_events(bdev->bd_disk); + disk_unblock_events(disk); bdev->bd_write_holder = false; } } @@ -1837,11 +1617,11 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * event. This is to ensure detection of media removal commanded * from userland - e.g. eject(1). */ - disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); - + disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); mutex_unlock(&bdev->bd_mutex); __blkdev_put(bdev, mode, 0); + blkdev_put_no_open(bdev); } EXPORT_SYMBOL(blkdev_put); @@ -2054,37 +1834,32 @@ const struct file_operations def_blk_fops = { * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ -struct block_device *lookup_bdev(const char *pathname) +int lookup_bdev(const char *pathname, dev_t *dev) { - struct block_device *bdev; struct inode *inode; struct path path; int error; if (!pathname || !*pathname) - return ERR_PTR(-EINVAL); + return -EINVAL; error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) - return ERR_PTR(error); + return error; inode = d_backing_inode(path.dentry); error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) - goto fail; + goto out_path_put; error = -EACCES; if (!may_open_dev(&path)) - goto fail; - error = -ENOMEM; - bdev = bd_acquire(inode); - if (!bdev) - goto fail; -out: + goto out_path_put; + + *dev = inode->i_rdev; + error = 0; +out_path_put: path_put(&path); - return bdev; -fail: - bdev = ERR_PTR(error); - goto out; + return error; } EXPORT_SYMBOL(lookup_bdev); diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 4522a1c4cd08..19b9fffa2c9c 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1343,8 +1343,6 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info, void btrfs_sysfs_remove_device(struct btrfs_device *device) { - struct hd_struct *disk; - struct kobject *disk_kobj; struct kobject *devices_kobj; /* @@ -1354,11 +1352,8 @@ void btrfs_sysfs_remove_device(struct btrfs_device *device) devices_kobj = device->fs_info->fs_devices->devices_kobj; ASSERT(devices_kobj); - if (device->bdev) { - disk = device->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; - sysfs_remove_link(devices_kobj, disk_kobj->name); - } + if (device->bdev) + sysfs_remove_link(devices_kobj, bdev_kobj(device->bdev)->name); if (device->devid_kobj.state_initialized) { kobject_del(&device->devid_kobj); @@ -1464,11 +1459,7 @@ int btrfs_sysfs_add_device(struct btrfs_device *device) nofs_flag = memalloc_nofs_save(); if (device->bdev) { - struct hd_struct *disk; - struct kobject *disk_kobj; - - disk = device->bdev->bd_part; - disk_kobj = &part_to_dev(disk)->kobj; + struct kobject *disk_kobj = bdev_kobj(device->bdev); ret = sysfs_create_link(devices_kobj, disk_kobj, disk_kobj->name); if (ret) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7930e1c78c45..ee086fc56c30 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -935,16 +935,16 @@ static noinline struct btrfs_device *device_list_add(const char *path, * make sure it's the same device if the device is mounted */ if (device->bdev) { - struct block_device *path_bdev; + int error; + dev_t path_dev; - path_bdev = lookup_bdev(path); - if (IS_ERR(path_bdev)) { + error = lookup_bdev(path, &path_dev); + if (error) { mutex_unlock(&fs_devices->device_list_mutex); - return ERR_CAST(path_bdev); + return ERR_PTR(error); } - if (device->bdev != path_bdev) { - bdput(path_bdev); + if (device->bdev->bd_dev != path_dev) { mutex_unlock(&fs_devices->device_list_mutex); /* * device->fs_info may not be reliable here, so @@ -959,7 +959,6 @@ static noinline struct btrfs_device *device_list_add(const char *path, task_pid_nr(current)); return ERR_PTR(-EEXIST); } - bdput(path_bdev); btrfs_info_in_rcu(device->fs_info, "devid %llu device path %s changed to %s scanned by %s (%d)", devid, rcu_str_deref(device->name), diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 155545180046..c38846659019 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -165,7 +165,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) if (!zone_info) return -ENOMEM; - nr_sectors = bdev->bd_part->nr_sects; + nr_sectors = bdev_nr_sectors(bdev); zone_sectors = bdev_zone_sectors(bdev); /* Check if it's power of 2 (see is_power_of_2) */ ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); @@ -505,7 +505,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, return -EINVAL; zone_size = zone_sectors << SECTOR_SHIFT; zone_sectors_shift = ilog2(zone_sectors); - nr_sectors = bdev->bd_part->nr_sects; + nr_sectors = bdev_nr_sectors(bdev); nr_zones = nr_sectors >> zone_sectors_shift; sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); @@ -603,7 +603,7 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) zone_sectors = bdev_zone_sectors(bdev); zone_sectors_shift = ilog2(zone_sectors); - nr_sectors = bdev->bd_part->nr_sects; + nr_sectors = bdev_nr_sectors(bdev); nr_zones = nr_sectors >> zone_sectors_shift; sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); diff --git a/fs/buffer.c b/fs/buffer.c index b56f99f82b5b..32647d2011df 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -523,7 +523,7 @@ static int osync_buffers_list(spinlock_t *lock, struct list_head *list) void emergency_thaw_bdev(struct super_block *sb) { - while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) + while (sb->s_bdev && !thaw_bdev(sb->s_bdev)) printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev); } diff --git a/fs/exec.c b/fs/exec.c index c238c258afce..5d4d52039105 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -757,8 +757,8 @@ int setup_arg_pages(struct linux_binprm *bprm, #ifdef CONFIG_STACK_GROWSUP /* Limit stack size */ stack_base = bprm->rlim_stack.rlim_max; - if (stack_base > STACK_SIZE_MAX) - stack_base = STACK_SIZE_MAX; + + stack_base = calc_max_stack_size(stack_base); /* Add space for stack randomization. */ stack_base += (STACK_RND_MASK << PAGE_SHIFT); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index f0381876a7e5..524e13432447 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -624,7 +624,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) case EXT4_GOING_FLAGS_DEFAULT: freeze_bdev(sb->s_bdev); set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); - thaw_bdev(sb->s_bdev, sb); + thaw_bdev(sb->s_bdev); break; case EXT4_GOING_FLAGS_LOGFLUSH: set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 95e8557196ba..4396e1f72b33 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4044,9 +4044,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_sb = sb; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sb_block = sb_block; - if (sb->s_bdev->bd_part) - sbi->s_sectors_written_start = - part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]); + sbi->s_sectors_written_start = + part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); /* Cleanup superblock name */ strreplace(sb->s_id, '/', '!'); @@ -5494,15 +5493,10 @@ static int ext4_commit_super(struct super_block *sb, int sync) */ if (!(sb->s_flags & SB_RDONLY)) ext4_update_tstamp(es, s_wtime); - if (sb->s_bdev->bd_part) - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - - EXT4_SB(sb)->s_sectors_written_start) >> 1)); - else - es->s_kbytes_written = - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); + es->s_kbytes_written = + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - + EXT4_SB(sb)->s_sectors_written_start) >> 1)); if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) ext4_free_blocks_count_set(es, EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 4e27fe6ed3ae..075aa3a19ff5 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -62,11 +62,8 @@ static ssize_t session_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); return snprintf(buf, PAGE_SIZE, "%lu\n", - (part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - + (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - sbi->s_sectors_written_start) >> 1); } @@ -74,12 +71,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_sb_info *sbi, char *buf) { struct super_block *sb = sbi->s_buddy_cache->i_sb; - if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); return snprintf(buf, PAGE_SIZE, "%llu\n", (unsigned long long)(sbi->s_kbytes_written + - ((part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]) - + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - EXT4_SB(sb)->s_sectors_written_start) >> 1))); } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 023462e80e58..54a1905af052 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1395,7 +1395,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u32 crc32 = 0; int i; int cp_payload_blks = __cp_payload(sbi); - struct super_block *sb = sbi->sb; struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); u64 kbytes_written; int err; @@ -1489,9 +1488,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk += data_sum_blocks; /* Record write statistics in the hot node summary */ - kbytes_written = sbi->kbytes_written; - if (sb->s_bdev->bd_part) - kbytes_written += BD_PART_WRITTEN(sbi); + kbytes_written = sbi->kbytes_written + BD_PART_WRITTEN(sbi); seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2675490158a9..9f62c9101215 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1678,7 +1678,7 @@ static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi) * and the return value is in kbytes. s is of struct f2fs_sb_info. */ #define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) - \ + (((u64)part_stat_read((s)->sb->s_bdev, sectors[STAT_WRITE]) - \ (s)->sectors_written_start) >> 1) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index ee861c6d9ff0..a9fc482a0e60 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2230,16 +2230,12 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) switch (in) { case F2FS_GOING_DOWN_FULLSYNC: - sb = freeze_bdev(sb->s_bdev); - if (IS_ERR(sb)) { - ret = PTR_ERR(sb); + ret = freeze_bdev(sb->s_bdev); + if (ret) goto out; - } - if (sb) { - f2fs_stop_checkpoint(sbi, false); - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); - thaw_bdev(sb->s_bdev, sb); - } + f2fs_stop_checkpoint(sbi, false); + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + thaw_bdev(sb->s_bdev); break; case F2FS_GOING_DOWN_METASYNC: /* do checkpoint only */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 42293b7ceaf2..bedecc06546b 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -3151,7 +3151,7 @@ static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx, static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) { struct block_device *bdev = FDEV(devi).bdev; - sector_t nr_sectors = bdev->bd_part->nr_sects; + sector_t nr_sectors = bdev_nr_sectors(bdev); struct f2fs_report_zones_args rep_zone_arg; int ret; @@ -3693,10 +3693,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) } /* For write statistics */ - if (sb->s_bdev->bd_part) - sbi->sectors_written_start = - (u64)part_stat_read(sb->s_bdev->bd_part, - sectors[STAT_WRITE]); + sbi->sectors_written_start = + (u64)part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index ec77ccfea923..24e876e849c5 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -90,11 +90,6 @@ static ssize_t free_segments_show(struct f2fs_attr *a, static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - struct super_block *sb = sbi->sb; - - if (!sb->s_bdev->bd_part) - return sprintf(buf, "0\n"); - return sprintf(buf, "%llu\n", (unsigned long long)(sbi->kbytes_written + BD_PART_WRITTEN(sbi))); @@ -103,12 +98,8 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, static ssize_t features_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - struct super_block *sb = sbi->sb; int len = 0; - if (!sb->s_bdev->bd_part) - return sprintf(buf, "0\n"); - if (f2fs_sb_has_encrypt(sbi)) len += scnprintf(buf, PAGE_SIZE - len, "%s", "encryption"); diff --git a/fs/inode.c b/fs/inode.c index 9d78c37b00b8..cb008acf0efd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -155,7 +155,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_bytes = 0; inode->i_generation = 0; inode->i_pipe = NULL; - inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_link = NULL; inode->i_dir_seq = 0; @@ -580,8 +579,6 @@ static void evict(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); } - if (S_ISBLK(inode->i_mode) && inode->i_bdev) - bd_forget(inode); if (S_ISCHR(inode->i_mode) && inode->i_cdev) cd_forget(inode); diff --git a/fs/internal.h b/fs/internal.h index a7cd0f64faa4..77c50befbfbe 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -25,7 +25,6 @@ extern void __init bdev_cache_init(void); extern int __sync_blockdev(struct block_device *bdev, int wait); void iterate_bdevs(void (*)(struct block_device *, void *), void *); void emergency_thaw_bdev(struct super_block *sb); -void bd_forget(struct inode *inode); #else static inline void bdev_cache_init(void) { @@ -43,9 +42,6 @@ static inline int emergency_thaw_bdev(struct super_block *sb) { return 0; } -static inline void bd_forget(struct inode *inode) -{ -} #endif /* CONFIG_BLOCK */ /* @@ -78,6 +74,8 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *, long do_rmdir(int dfd, struct filename *name); long do_unlinkat(int dfd, struct filename *name); int may_linkat(struct path *link); +int do_renameat2(int olddfd, struct filename *oldname, int newdfd, + struct filename *newname, unsigned int flags); /* * namespace.c @@ -114,7 +112,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); */ extern int reconfigure_super(struct fs_context *); extern bool trylock_super(struct super_block *sb); -extern struct super_block *user_get_super(dev_t); +struct super_block *user_get_super(dev_t, bool excl); +void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); /* diff --git a/fs/io-wq.c b/fs/io-wq.c index b53c055bea6a..f72d53848dcb 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -1078,16 +1078,6 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, return IO_WQ_CANCEL_NOTFOUND; } -static bool io_wq_io_cb_cancel_data(struct io_wq_work *work, void *data) -{ - return work == data; -} - -enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork) -{ - return io_wq_cancel_cb(wq, io_wq_io_cb_cancel_data, (void *)cwork, false); -} - struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) { int ret = -ENOMEM, node; diff --git a/fs/io-wq.h b/fs/io-wq.h index cba36f03c355..069496c6d4f9 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -129,7 +129,6 @@ static inline bool io_wq_is_hashed(struct io_wq_work *work) } void io_wq_cancel_all(struct io_wq *wq); -enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork); typedef bool (work_cancel_fn)(struct io_wq_work *, void *); diff --git a/fs/io_uring.c b/fs/io_uring.c index 2b588bd5494c..6f9392c35eef 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -245,6 +245,8 @@ struct io_sq_data { struct task_struct *thread; struct wait_queue_head wait; + + unsigned sq_thread_idle; }; struct io_ring_ctx { @@ -285,7 +287,6 @@ struct io_ring_ctx { struct list_head timeout_list; struct list_head cq_overflow_list; - wait_queue_head_t inflight_wait; struct io_uring_sqe *sq_sqes; } ____cacheline_aligned_in_smp; @@ -310,7 +311,6 @@ struct io_ring_ctx { struct io_sq_data *sq_data; /* if using sq thread polling */ struct wait_queue_head sqo_sq_wait; - struct wait_queue_entry sqo_wait_entry; struct list_head sqd_list; /* @@ -395,16 +395,18 @@ struct io_ring_ctx { */ struct io_poll_iocb { struct file *file; - union { - struct wait_queue_head *head; - u64 addr; - }; + struct wait_queue_head *head; __poll_t events; bool done; bool canceled; struct wait_queue_entry wait; }; +struct io_poll_remove { + struct file *file; + u64 addr; +}; + struct io_close { struct file *file; struct file *put_file; @@ -444,11 +446,17 @@ struct io_timeout { u32 off; u32 target_seq; struct list_head list; + /* head of the link, used by linked timeouts only */ + struct io_kiocb *head; }; struct io_timeout_rem { struct file *file; u64 addr; + + /* timeout update */ + struct timespec64 ts; + u32 flags; }; struct io_rw { @@ -541,6 +549,27 @@ struct io_statx { struct statx __user *buffer; }; +struct io_shutdown { + struct file *file; + int how; +}; + +struct io_rename { + struct file *file; + int old_dfd; + int new_dfd; + struct filename *oldpath; + struct filename *newpath; + int flags; +}; + +struct io_unlink { + struct file *file; + int dfd; + int flags; + struct filename *filename; +}; + struct io_completion { struct file *file; struct list_head list; @@ -575,7 +604,6 @@ enum { REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, - REQ_F_LINK_HEAD_BIT, REQ_F_FAIL_LINK_BIT, REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, @@ -607,8 +635,6 @@ enum { /* IOSQE_BUFFER_SELECT */ REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), - /* head of a link */ - REQ_F_LINK_HEAD = BIT(REQ_F_LINK_HEAD_BIT), /* fail rest of links */ REQ_F_FAIL_LINK = BIT(REQ_F_FAIL_LINK_BIT), /* on inflight list */ @@ -651,6 +677,7 @@ struct io_kiocb { struct file *file; struct io_rw rw; struct io_poll_iocb poll; + struct io_poll_remove poll_remove; struct io_accept accept; struct io_sync sync; struct io_cancel cancel; @@ -667,6 +694,9 @@ struct io_kiocb { struct io_splice splice; struct io_provide_buf pbuf; struct io_statx statx; + struct io_shutdown shutdown; + struct io_rename rename; + struct io_unlink unlink; /* use only after cleaning per-op data, see io_clean_op() */ struct io_completion compl; }; @@ -686,15 +716,14 @@ struct io_kiocb { struct task_struct *task; u64 user_data; - struct list_head link_list; + struct io_kiocb *link; + struct percpu_ref *fixed_file_refs; /* * 1. used with ctx->iopoll_list with reads/writes * 2. to track reqs with ->files (see io_op_def::file_table) */ struct list_head inflight_entry; - - struct percpu_ref *fixed_file_refs; struct callback_head task_work; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ struct hlist_node hash_node; @@ -725,6 +754,8 @@ struct io_submit_state { void *reqs[IO_IOPOLL_BATCH]; unsigned int free_reqs; + bool plug_started; + /* * Batch completion logic */ @@ -735,7 +766,7 @@ struct io_submit_state { */ struct file *file; unsigned int fd; - unsigned int has_refs; + unsigned int file_refs; unsigned int ios_left; }; @@ -757,6 +788,8 @@ struct io_op_def { unsigned buffer_select : 1; /* must always have async data allocated */ unsigned needs_async_data : 1; + /* should block plug */ + unsigned plug : 1; /* size of async data needed, if any */ unsigned short async_size; unsigned work_flags; @@ -770,6 +803,7 @@ static const struct io_op_def io_op_defs[] = { .pollin = 1, .buffer_select = 1, .needs_async_data = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, @@ -779,6 +813,7 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, .needs_async_data = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE, @@ -791,6 +826,7 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollin = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM, }, @@ -799,6 +835,7 @@ static const struct io_op_def io_op_defs[] = { .hash_reg_file = 1, .unbound_nonreg_file = 1, .pollout = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE | IO_WQ_WORK_MM, @@ -818,8 +855,7 @@ static const struct io_op_def io_op_defs[] = { .pollout = 1, .needs_async_data = 1, .async_size = sizeof(struct io_async_msghdr), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FS, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_RECVMSG] = { .needs_file = 1, @@ -828,15 +864,17 @@ static const struct io_op_def io_op_defs[] = { .buffer_select = 1, .needs_async_data = 1, .async_size = sizeof(struct io_async_msghdr), - .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FS, + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, [IORING_OP_TIMEOUT] = { .needs_async_data = 1, .async_size = sizeof(struct io_timeout_data), .work_flags = IO_WQ_WORK_MM, }, - [IORING_OP_TIMEOUT_REMOVE] = {}, + [IORING_OP_TIMEOUT_REMOVE] = { + /* used by timeout updates' prep() */ + .work_flags = IO_WQ_WORK_MM, + }, [IORING_OP_ACCEPT] = { .needs_file = 1, .unbound_nonreg_file = 1, @@ -863,7 +901,7 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_OPENAT] = { .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG | - IO_WQ_WORK_FS, + IO_WQ_WORK_FS | IO_WQ_WORK_MM, }, [IORING_OP_CLOSE] = { .needs_file = 1, @@ -882,6 +920,7 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollin = 1, .buffer_select = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG, }, @@ -889,6 +928,7 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .unbound_nonreg_file = 1, .pollout = 1, + .plug = 1, .async_size = sizeof(struct io_async_rw), .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE, @@ -915,7 +955,7 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_OPENAT2] = { .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_FS | - IO_WQ_WORK_BLKCG, + IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM, }, [IORING_OP_EPOLL_CTL] = { .unbound_nonreg_file = 1, @@ -934,6 +974,17 @@ static const struct io_op_def io_op_defs[] = { .hash_reg_file = 1, .unbound_nonreg_file = 1, }, + [IORING_OP_SHUTDOWN] = { + .needs_file = 1, + }, + [IORING_OP_RENAMEAT] = { + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES | + IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG, + }, + [IORING_OP_UNLINKAT] = { + .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES | + IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG, + }, }; enum io_mem_account { @@ -983,6 +1034,9 @@ struct sock *io_uring_get_socket(struct file *file) } EXPORT_SYMBOL(io_uring_get_socket); +#define io_for_each_link(pos, head) \ + for (pos = (head); pos; pos = pos->link) + static inline void io_clean_op(struct io_kiocb *req) { if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED | @@ -990,8 +1044,39 @@ static inline void io_clean_op(struct io_kiocb *req) __io_clean_op(req); } -static void io_sq_thread_drop_mm(void) +static inline void io_set_resource_node(struct io_kiocb *req) { + struct io_ring_ctx *ctx = req->ctx; + + if (!req->fixed_file_refs) { + req->fixed_file_refs = &ctx->file_data->node->refs; + percpu_ref_get(req->fixed_file_refs); + } +} + +static bool io_match_task(struct io_kiocb *head, + struct task_struct *task, + struct files_struct *files) +{ + struct io_kiocb *req; + + if (task && head->task != task) + return false; + if (!files) + return true; + + io_for_each_link(req, head) { + if ((req->flags & REQ_F_WORK_INITIALIZED) && + (req->work.flags & IO_WQ_WORK_FILES) && + req->work.identity->files == files) + return true; + } + return false; +} + +static void io_sq_thread_drop_mm_files(void) +{ + struct files_struct *files = current->files; struct mm_struct *mm = current->mm; if (mm) { @@ -999,6 +1084,41 @@ static void io_sq_thread_drop_mm(void) mmput(mm); current->mm = NULL; } + if (files) { + struct nsproxy *nsproxy = current->nsproxy; + + task_lock(current); + current->files = NULL; + current->nsproxy = NULL; + task_unlock(current); + put_files_struct(files); + put_nsproxy(nsproxy); + } +} + +static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx) +{ + if (!current->files) { + struct files_struct *files; + struct nsproxy *nsproxy; + + task_lock(ctx->sqo_task); + files = ctx->sqo_task->files; + if (!files) { + task_unlock(ctx->sqo_task); + return -EOWNERDEAD; + } + atomic_inc(&files->count); + get_nsproxy(ctx->sqo_task->nsproxy); + nsproxy = ctx->sqo_task->nsproxy; + task_unlock(ctx->sqo_task); + + task_lock(current); + current->files = files; + current->nsproxy = nsproxy; + task_unlock(current); + } + return 0; } static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) @@ -1026,12 +1146,25 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx) return -EFAULT; } -static int io_sq_thread_acquire_mm(struct io_ring_ctx *ctx, - struct io_kiocb *req) +static int io_sq_thread_acquire_mm_files(struct io_ring_ctx *ctx, + struct io_kiocb *req) { - if (!(io_op_defs[req->opcode].work_flags & IO_WQ_WORK_MM)) - return 0; - return __io_sq_thread_acquire_mm(ctx); + const struct io_op_def *def = &io_op_defs[req->opcode]; + int ret; + + if (def->work_flags & IO_WQ_WORK_MM) { + ret = __io_sq_thread_acquire_mm(ctx); + if (unlikely(ret)) + return ret; + } + + if (def->needs_file || (def->work_flags & IO_WQ_WORK_FILES)) { + ret = __io_sq_thread_acquire_files(ctx); + if (unlikely(ret)) + return ret; + } + + return 0; } static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx, @@ -1174,7 +1307,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->iopoll_list); INIT_LIST_HEAD(&ctx->defer_list); INIT_LIST_HEAD(&ctx->timeout_list); - init_waitqueue_head(&ctx->inflight_wait); spin_lock_init(&ctx->inflight_lock); INIT_LIST_HEAD(&ctx->inflight_list); INIT_DELAYED_WORK(&ctx->file_put_work, io_file_put_work); @@ -1416,10 +1548,8 @@ static void io_prep_async_link(struct io_kiocb *req) { struct io_kiocb *cur; - io_prep_async_work(req); - if (req->flags & REQ_F_LINK_HEAD) - list_for_each_entry(cur, &req->link_list, link_list) - io_prep_async_work(cur); + io_for_each_link(cur, req) + io_prep_async_work(cur); } static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req) @@ -1460,30 +1590,18 @@ static void io_kill_timeout(struct io_kiocb *req) } } -static bool io_task_match(struct io_kiocb *req, struct task_struct *tsk) -{ - struct io_ring_ctx *ctx = req->ctx; - - if (!tsk || req->task == tsk) - return true; - if (ctx->flags & IORING_SETUP_SQPOLL) { - if (ctx->sq_data && req->task == ctx->sq_data->thread) - return true; - } - return false; -} - /* * Returns true if we found and killed one or more timeouts */ -static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk) +static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, + struct files_struct *files) { struct io_kiocb *req, *tmp; int canceled = 0; spin_lock_irq(&ctx->completion_lock); list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { - if (io_task_match(req, tsk)) { + if (io_match_task(req, tsk, files)) { io_kill_timeout(req); canceled++; } @@ -1594,32 +1712,6 @@ static void io_cqring_mark_overflow(struct io_ring_ctx *ctx) } } -static inline bool __io_match_files(struct io_kiocb *req, - struct files_struct *files) -{ - return ((req->flags & REQ_F_WORK_INITIALIZED) && - (req->work.flags & IO_WQ_WORK_FILES)) && - req->work.identity->files == files; -} - -static bool io_match_files(struct io_kiocb *req, - struct files_struct *files) -{ - struct io_kiocb *link; - - if (!files) - return true; - if (__io_match_files(req, files)) - return true; - if (req->flags & REQ_F_LINK_HEAD) { - list_for_each_entry(link, &req->link_list, link_list) { - if (__io_match_files(link, files)) - return true; - } - } - return false; -} - /* Returns true if there are no backlogged entries after the flush */ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, @@ -1647,9 +1739,7 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, cqe = NULL; list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) { - if (tsk && req->task != tsk) - continue; - if (!io_match_files(req, files)) + if (!io_match_task(req, tsk, files)) continue; cqe = io_get_cqring(ctx); @@ -1845,9 +1935,7 @@ static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx, static inline void io_put_file(struct io_kiocb *req, struct file *file, bool fixed) { - if (fixed) - percpu_ref_put(req->fixed_file_refs); - else + if (!fixed) fput(file); } @@ -1859,7 +1947,8 @@ static void io_dismantle_req(struct io_kiocb *req) kfree(req->async_data); if (req->file) io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE)); - + if (req->fixed_file_refs) + percpu_ref_put(req->fixed_file_refs); io_req_clean_work(req); } @@ -1882,6 +1971,14 @@ static void __io_free_req(struct io_kiocb *req) percpu_ref_put(&ctx->refs); } +static inline void io_remove_next_linked(struct io_kiocb *req) +{ + struct io_kiocb *nxt = req->link; + + req->link = nxt->link; + nxt->link = NULL; +} + static void io_kill_linked_timeout(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; @@ -1890,8 +1987,8 @@ static void io_kill_linked_timeout(struct io_kiocb *req) unsigned long flags; spin_lock_irqsave(&ctx->completion_lock, flags); - link = list_first_entry_or_null(&req->link_list, struct io_kiocb, - link_list); + link = req->link; + /* * Can happen if a linked timeout fired and link had been like * req -> link t-out -> link t-out [-> ...] @@ -1900,7 +1997,8 @@ static void io_kill_linked_timeout(struct io_kiocb *req) struct io_timeout_data *io = link->async_data; int ret; - list_del_init(&link->link_list); + io_remove_next_linked(req); + link->timeout.head = NULL; ret = hrtimer_try_to_cancel(&io->timer); if (ret != -1) { io_cqring_fill_event(link, -ECANCELED); @@ -1917,41 +2015,22 @@ static void io_kill_linked_timeout(struct io_kiocb *req) } } -static struct io_kiocb *io_req_link_next(struct io_kiocb *req) -{ - struct io_kiocb *nxt; - /* - * The list should never be empty when we are called here. But could - * potentially happen if the chain is messed up, check to be on the - * safe side. - */ - if (unlikely(list_empty(&req->link_list))) - return NULL; - - nxt = list_first_entry(&req->link_list, struct io_kiocb, link_list); - list_del_init(&req->link_list); - if (!list_empty(&nxt->link_list)) - nxt->flags |= REQ_F_LINK_HEAD; - return nxt; -} - -/* - * Called if REQ_F_LINK_HEAD is set, and we fail the head request - */ static void io_fail_links(struct io_kiocb *req) { + struct io_kiocb *link, *nxt; struct io_ring_ctx *ctx = req->ctx; unsigned long flags; spin_lock_irqsave(&ctx->completion_lock, flags); - while (!list_empty(&req->link_list)) { - struct io_kiocb *link = list_first_entry(&req->link_list, - struct io_kiocb, link_list); + link = req->link; + req->link = NULL; + + while (link) { + nxt = link->link; + link->link = NULL; - list_del_init(&link->link_list); trace_io_uring_fail_link(req, link); - io_cqring_fill_event(link, -ECANCELED); /* @@ -1963,8 +2042,8 @@ static void io_fail_links(struct io_kiocb *req) io_put_req_deferred(link, 2); else io_double_put_req(link); + link = nxt; } - io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); @@ -1973,7 +2052,6 @@ static void io_fail_links(struct io_kiocb *req) static struct io_kiocb *__io_req_find_next(struct io_kiocb *req) { - req->flags &= ~REQ_F_LINK_HEAD; if (req->flags & REQ_F_LINK_TIMEOUT) io_kill_linked_timeout(req); @@ -1983,20 +2061,24 @@ static struct io_kiocb *__io_req_find_next(struct io_kiocb *req) * dependencies to the next request. In case of failure, fail the rest * of the chain. */ - if (likely(!(req->flags & REQ_F_FAIL_LINK))) - return io_req_link_next(req); + if (likely(!(req->flags & REQ_F_FAIL_LINK))) { + struct io_kiocb *nxt = req->link; + + req->link = NULL; + return nxt; + } io_fail_links(req); return NULL; } -static struct io_kiocb *io_req_find_next(struct io_kiocb *req) +static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) { - if (likely(!(req->flags & REQ_F_LINK_HEAD))) + if (likely(!(req->link) && !(req->flags & REQ_F_LINK_TIMEOUT))) return NULL; return __io_req_find_next(req); } -static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok) +static int io_req_task_work_add(struct io_kiocb *req) { struct task_struct *tsk = req->task; struct io_ring_ctx *ctx = req->ctx; @@ -2013,7 +2095,7 @@ static int io_req_task_work_add(struct io_kiocb *req, bool twa_signal_ok) * will do the job. */ notify = TWA_NONE; - if (!(ctx->flags & IORING_SETUP_SQPOLL) && twa_signal_ok) + if (!(ctx->flags & IORING_SETUP_SQPOLL)) notify = TWA_SIGNAL; ret = task_work_add(tsk, &req->task_work, notify); @@ -2050,7 +2132,8 @@ static void __io_req_task_submit(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - if (!__io_sq_thread_acquire_mm(ctx)) { + if (!__io_sq_thread_acquire_mm(ctx) && + !__io_sq_thread_acquire_files(ctx)) { mutex_lock(&ctx->uring_lock); __io_queue_sqe(req, NULL); mutex_unlock(&ctx->uring_lock); @@ -2075,7 +2158,7 @@ static void io_req_task_queue(struct io_kiocb *req) init_task_work(&req->task_work, io_req_task_submit); percpu_ref_get(&req->ctx->refs); - ret = io_req_task_work_add(req, true); + ret = io_req_task_work_add(req); if (unlikely(ret)) { struct task_struct *tsk; @@ -2086,7 +2169,7 @@ static void io_req_task_queue(struct io_kiocb *req) } } -static void io_queue_next(struct io_kiocb *req) +static inline void io_queue_next(struct io_kiocb *req) { struct io_kiocb *nxt = io_req_find_next(req); @@ -2143,8 +2226,7 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req) io_free_req(req); return; } - if (req->flags & REQ_F_LINK_HEAD) - io_queue_next(req); + io_queue_next(req); if (req->task != rb->task) { if (rb->task) { @@ -2197,7 +2279,7 @@ static void io_free_req_deferred(struct io_kiocb *req) int ret; init_task_work(&req->task_work, io_put_req_deferred_cb); - ret = io_req_task_work_add(req, true); + ret = io_req_task_work_add(req); if (unlikely(ret)) { struct task_struct *tsk; @@ -2246,7 +2328,7 @@ static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush) * we wake up the task, and the next invocation will flush the * entries. We cannot safely to it from here. */ - if (noflush && !list_empty(&ctx->cq_overflow_list)) + if (noflush) return -1U; io_cqring_overflow_flush(ctx, false, NULL, NULL); @@ -2593,7 +2675,7 @@ static bool io_rw_reissue(struct io_kiocb *req, long res) if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) return false; - ret = io_sq_thread_acquire_mm(req->ctx, req); + ret = io_sq_thread_acquire_mm_files(req->ctx, req); if (io_resubmit_prep(req, ret)) { refcount_inc(&req->refs); @@ -2641,7 +2723,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) * find it from a io_iopoll_getevents() thread before the issuer is done * accessing the kiocb cookie. */ -static void io_iopoll_req_issued(struct io_kiocb *req) +static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async) { struct io_ring_ctx *ctx = req->ctx; @@ -2670,21 +2752,25 @@ static void io_iopoll_req_issued(struct io_kiocb *req) else list_add_tail(&req->inflight_entry, &ctx->iopoll_list); - if ((ctx->flags & IORING_SETUP_SQPOLL) && + /* + * If IORING_SETUP_SQPOLL is enabled, sqes are either handled in sq thread + * task context or in io worker task context. If current task context is + * sq thread, we don't need to check whether should wake up sq thread. + */ + if (in_async && (ctx->flags & IORING_SETUP_SQPOLL) && wq_has_sleeper(&ctx->sq_data->wait)) wake_up(&ctx->sq_data->wait); } -static void __io_state_file_put(struct io_submit_state *state) +static inline void __io_state_file_put(struct io_submit_state *state) { - if (state->has_refs) - fput_many(state->file, state->has_refs); - state->file = NULL; + fput_many(state->file, state->file_refs); + state->file_refs = 0; } static inline void io_state_file_put(struct io_submit_state *state) { - if (state->file) + if (state->file_refs) __io_state_file_put(state); } @@ -2698,29 +2784,25 @@ static struct file *__io_file_get(struct io_submit_state *state, int fd) if (!state) return fget(fd); - if (state->file) { + if (state->file_refs) { if (state->fd == fd) { - state->has_refs--; + state->file_refs--; return state->file; } __io_state_file_put(state); } state->file = fget_many(fd, state->ios_left); - if (!state->file) + if (unlikely(!state->file)) return NULL; state->fd = fd; - state->has_refs = state->ios_left - 1; + state->file_refs = state->ios_left - 1; return state->file; } static bool io_bdev_nowait(struct block_device *bdev) { -#ifdef CONFIG_BLOCK return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); -#else - return true; -#endif } /* @@ -2733,14 +2815,16 @@ static bool io_file_supports_async(struct file *file, int rw) umode_t mode = file_inode(file)->i_mode; if (S_ISBLK(mode)) { - if (io_bdev_nowait(file->f_inode->i_bdev)) + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(I_BDEV(file->f_mapping->host))) return true; return false; } if (S_ISCHR(mode) || S_ISSOCK(mode)) return true; if (S_ISREG(mode)) { - if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) && + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(file->f_inode->i_sb->s_bdev) && file->f_op != &io_uring_fops) return true; return false; @@ -3065,7 +3149,7 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, return __io_iov_buffer_select(req, iov, needs_lock); } -static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, +static ssize_t io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, struct iov_iter *iter, bool needs_lock) { @@ -3094,7 +3178,7 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, ret = import_single_range(rw, buf, sqe_len, *iovec, iter); *iovec = NULL; - return ret < 0 ? ret : sqe_len; + return ret; } if (req->flags & REQ_F_BUFFER_SELECT) { @@ -3111,18 +3195,6 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req, req->ctx->compat); } -static ssize_t io_import_iovec(int rw, struct io_kiocb *req, - struct iovec **iovec, struct iov_iter *iter, - bool needs_lock) -{ - struct io_async_rw *iorw = req->async_data; - - if (!iorw) - return __io_import_iovec(rw, req, iovec, iter, needs_lock); - *iovec = NULL; - return iov_iter_count(&iorw->iter); -} - static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) { return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; @@ -3246,7 +3318,7 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw) struct iovec *iov = iorw->fast_iov; ssize_t ret; - ret = __io_import_iovec(rw, req, &iov, &iorw->iter, false); + ret = io_import_iovec(rw, req, &iov, &iorw->iter, false); if (unlikely(ret < 0)) return ret; @@ -3305,7 +3377,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, /* submit ref gets dropped, acquire a new one */ refcount_inc(&req->refs); - ret = io_req_task_work_add(req, true); + ret = io_req_task_work_add(req); if (unlikely(ret)) { struct task_struct *tsk; @@ -3379,17 +3451,17 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, struct iov_iter __iter, *iter = &__iter; struct io_async_rw *rw = req->async_data; ssize_t io_size, ret, ret2; - size_t iov_count; bool no_async; - if (rw) + if (rw) { iter = &rw->iter; - - ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); - if (ret < 0) - return ret; - iov_count = iov_iter_count(iter); - io_size = ret; + iovec = NULL; + } else { + ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); + if (ret < 0) + return ret; + } + io_size = iov_iter_count(iter); req->result = io_size; ret = 0; @@ -3405,7 +3477,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, if (no_async) goto copy_iov; - ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), iov_count); + ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size); if (unlikely(ret)) goto out_free; @@ -3424,7 +3496,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, if (req->file->f_flags & O_NONBLOCK) goto done; /* some cases will consume bytes even on error returns */ - iov_iter_revert(iter, iov_count - iov_iter_count(iter)); + iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = 0; goto copy_iov; } else if (ret < 0) { @@ -3507,17 +3579,17 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, struct kiocb *kiocb = &req->rw.kiocb; struct iov_iter __iter, *iter = &__iter; struct io_async_rw *rw = req->async_data; - size_t iov_count; ssize_t ret, ret2, io_size; - if (rw) + if (rw) { iter = &rw->iter; - - ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); - if (ret < 0) - return ret; - iov_count = iov_iter_count(iter); - io_size = ret; + iovec = NULL; + } else { + ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); + if (ret < 0) + return ret; + } + io_size = iov_iter_count(iter); req->result = io_size; /* Ensure we clear previously set non-block flag */ @@ -3535,7 +3607,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, (req->flags & REQ_F_ISREG)) goto copy_iov; - ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), iov_count); + ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size); if (unlikely(ret)) goto out_free; @@ -3578,7 +3650,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, } else { copy_iov: /* some cases will consume bytes even on error returns */ - iov_iter_revert(iter, iov_count - iov_iter_count(iter)); + iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); if (!ret) return -EAGAIN; @@ -3590,6 +3662,135 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, return ret; } +static int io_renameat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_rename *ren = &req->rename; + const char __user *oldf, *newf; + + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + ren->old_dfd = READ_ONCE(sqe->fd); + oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); + newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + ren->new_dfd = READ_ONCE(sqe->len); + ren->flags = READ_ONCE(sqe->rename_flags); + + ren->oldpath = getname(oldf); + if (IS_ERR(ren->oldpath)) + return PTR_ERR(ren->oldpath); + + ren->newpath = getname(newf); + if (IS_ERR(ren->newpath)) { + putname(ren->oldpath); + return PTR_ERR(ren->newpath); + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_renameat(struct io_kiocb *req, bool force_nonblock) +{ + struct io_rename *ren = &req->rename; + int ret; + + if (force_nonblock) + return -EAGAIN; + + ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, + ren->newpath, ren->flags); + + req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret < 0) + req_set_fail_links(req); + io_req_complete(req, ret); + return 0; +} + +static int io_unlinkat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_unlink *un = &req->unlink; + const char __user *fname; + + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + un->dfd = READ_ONCE(sqe->fd); + + un->flags = READ_ONCE(sqe->unlink_flags); + if (un->flags & ~AT_REMOVEDIR) + return -EINVAL; + + fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + un->filename = getname(fname); + if (IS_ERR(un->filename)) + return PTR_ERR(un->filename); + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_unlinkat(struct io_kiocb *req, bool force_nonblock) +{ + struct io_unlink *un = &req->unlink; + int ret; + + if (force_nonblock) + return -EAGAIN; + + if (un->flags & AT_REMOVEDIR) + ret = do_rmdir(un->dfd, un->filename); + else + ret = do_unlinkat(un->dfd, un->filename); + + req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret < 0) + req_set_fail_links(req); + io_req_complete(req, ret); + return 0; +} + +static int io_shutdown_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ +#if defined(CONFIG_NET) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags || + sqe->buf_index) + return -EINVAL; + + req->shutdown.how = READ_ONCE(sqe->len); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int io_shutdown(struct io_kiocb *req, bool force_nonblock) +{ +#if defined(CONFIG_NET) + struct socket *sock; + int ret; + + if (force_nonblock) + return -EAGAIN; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + ret = __sys_shutdown_sock(sock, req->shutdown.how); + io_req_complete(req, ret); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + static int __io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -3804,7 +4005,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { u64 flags, mode; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; mode = READ_ONCE(sqe->len); flags = READ_ONCE(sqe->open_flags); @@ -3818,7 +4019,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) size_t len; int ret; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); len = READ_ONCE(sqe->len); @@ -3948,11 +4149,17 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock, head = idr_find(&ctx->io_buffer_idr, p->bgid); if (head) ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); - - io_ring_submit_lock(ctx, !force_nonblock); if (ret < 0) req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + + /* need to hold the lock to complete IOPOLL requests */ + if (ctx->flags & IORING_SETUP_IOPOLL) { + __io_req_complete(req, ret, 0, cs); + io_ring_submit_unlock(ctx, !force_nonblock); + } else { + io_ring_submit_unlock(ctx, !force_nonblock); + __io_req_complete(req, ret, 0, cs); + } return 0; } @@ -4037,10 +4244,17 @@ static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock, } } out: - io_ring_submit_unlock(ctx, !force_nonblock); if (ret < 0) req_set_fail_links(req); - __io_req_complete(req, ret, 0, cs); + + /* need to hold the lock to complete IOPOLL requests */ + if (ctx->flags & IORING_SETUP_IOPOLL) { + __io_req_complete(req, ret, 0, cs); + io_ring_submit_unlock(ctx, !force_nonblock); + } else { + io_ring_submit_unlock(ctx, !force_nonblock); + __io_req_complete(req, ret, 0, cs); + } return 0; } @@ -4212,7 +4426,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) io_req_init_async(req); req->work.flags |= IO_WQ_WORK_NO_CANCEL; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) @@ -4694,7 +4908,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_accept *accept = &req->accept; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->len || sqe->buf_index) return -EINVAL; @@ -4735,7 +4949,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_connect *conn = &req->connect; struct io_async_connect *io = req->async_data; - if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL))) + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags) return -EINVAL; @@ -4859,7 +5073,6 @@ struct io_poll_table { static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, __poll_t mask, task_work_func_t func) { - bool twa_signal_ok; int ret; /* for instances that support it check for an event match first: */ @@ -4874,21 +5087,13 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, init_task_work(&req->task_work, func); percpu_ref_get(&req->ctx->refs); - /* - * If we using the signalfd wait_queue_head for this wakeup, then - * it's not safe to use TWA_SIGNAL as we could be recursing on the - * tsk->sighand->siglock on doing the wakeup. Should not be needed - * either, as the normal wakeup will suffice. - */ - twa_signal_ok = (poll->head != &req->task->sighand->signalfd_wqh); - /* * If this fails, then the task is exiting. When a task exits, the * work gets canceled, so just cancel this request as well instead * of executing it. We can't safely execute it anyway, as we may not * have the needed state needed for it anyway. */ - ret = io_req_task_work_add(req, twa_signal_ok); + ret = io_req_task_work_add(req); if (unlikely(ret)) { struct task_struct *tsk; @@ -5279,7 +5484,8 @@ static bool io_poll_remove_one(struct io_kiocb *req) /* * Returns true if we found and killed one or more poll requests */ -static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk) +static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, + struct files_struct *files) { struct hlist_node *tmp; struct io_kiocb *req; @@ -5291,7 +5497,7 @@ static bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk) list = &ctx->cancel_hash[i]; hlist_for_each_entry_safe(req, tmp, list, hash_node) { - if (io_task_match(req, tsk)) + if (io_match_task(req, tsk, files)) posted += io_poll_remove_one(req); } } @@ -5329,7 +5535,7 @@ static int io_poll_remove_prep(struct io_kiocb *req, sqe->poll_events) return -EINVAL; - req->poll.addr = READ_ONCE(sqe->addr); + req->poll_remove.addr = READ_ONCE(sqe->addr); return 0; } @@ -5340,12 +5546,10 @@ static int io_poll_remove_prep(struct io_kiocb *req, static int io_poll_remove(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - u64 addr; int ret; - addr = req->poll.addr; spin_lock_irq(&ctx->completion_lock); - ret = io_poll_cancel(ctx, addr); + ret = io_poll_cancel(ctx, req->poll_remove.addr); spin_unlock_irq(&ctx->completion_lock); if (ret < 0) @@ -5438,24 +5642,10 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -static int __io_timeout_cancel(struct io_kiocb *req) -{ - struct io_timeout_data *io = req->async_data; - int ret; - - ret = hrtimer_try_to_cancel(&io->timer); - if (ret == -1) - return -EALREADY; - list_del_init(&req->timeout.list); - - req_set_fail_links(req); - io_cqring_fill_event(req, -ECANCELED); - io_put_req_deferred(req, 1); - return 0; -} - -static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) +static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, + __u64 user_data) { + struct io_timeout_data *io; struct io_kiocb *req; int ret = -ENOENT; @@ -5467,22 +5657,71 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) } if (ret == -ENOENT) - return ret; + return ERR_PTR(ret); - return __io_timeout_cancel(req); + io = req->async_data; + ret = hrtimer_try_to_cancel(&io->timer); + if (ret == -1) + return ERR_PTR(-EALREADY); + list_del_init(&req->timeout.list); + return req; +} + +static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) +{ + struct io_kiocb *req = io_timeout_extract(ctx, user_data); + + if (IS_ERR(req)) + return PTR_ERR(req); + + req_set_fail_links(req); + io_cqring_fill_event(req, -ECANCELED); + io_put_req_deferred(req, 1); + return 0; +} + +static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) +{ + struct io_kiocb *req = io_timeout_extract(ctx, user_data); + struct io_timeout_data *data; + + if (IS_ERR(req)) + return PTR_ERR(req); + + req->timeout.off = 0; /* noseq */ + data = req->async_data; + list_add_tail(&req->timeout.list, &ctx->timeout_list); + hrtimer_init(&data->timer, CLOCK_MONOTONIC, mode); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); + return 0; } static int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + struct io_timeout_rem *tr = &req->timeout_rem; + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; - if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags) + if (sqe->ioprio || sqe->buf_index || sqe->len) return -EINVAL; - req->timeout_rem.addr = READ_ONCE(sqe->addr); + tr->addr = READ_ONCE(sqe->addr); + tr->flags = READ_ONCE(sqe->timeout_flags); + if (tr->flags & IORING_TIMEOUT_UPDATE) { + if (tr->flags & ~(IORING_TIMEOUT_UPDATE|IORING_TIMEOUT_ABS)) + return -EINVAL; + if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) + return -EFAULT; + } else if (tr->flags) { + /* timeout removal doesn't support flags */ + return -EINVAL; + } + return 0; } @@ -5491,11 +5730,19 @@ static int io_timeout_remove_prep(struct io_kiocb *req, */ static int io_timeout_remove(struct io_kiocb *req) { + struct io_timeout_rem *tr = &req->timeout_rem; struct io_ring_ctx *ctx = req->ctx; int ret; spin_lock_irq(&ctx->completion_lock); - ret = io_timeout_cancel(ctx, req->timeout_rem.addr); + if (req->timeout_rem.flags & IORING_TIMEOUT_UPDATE) { + enum hrtimer_mode mode = (tr->flags & IORING_TIMEOUT_ABS) + ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; + + ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); + } else { + ret = io_timeout_cancel(ctx, tr->addr); + } io_cqring_fill_event(req, ret); io_commit_cqring(ctx); @@ -5775,6 +6022,12 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_remove_buffers_prep(req, sqe); case IORING_OP_TEE: return io_tee_prep(req, sqe); + case IORING_OP_SHUTDOWN: + return io_shutdown_prep(req, sqe); + case IORING_OP_RENAMEAT: + return io_renameat_prep(req, sqe); + case IORING_OP_UNLINKAT: + return io_unlinkat_prep(req, sqe); } printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", @@ -5796,11 +6049,10 @@ static u32 io_get_sequence(struct io_kiocb *req) { struct io_kiocb *pos; struct io_ring_ctx *ctx = req->ctx; - u32 total_submitted, nr_reqs = 1; + u32 total_submitted, nr_reqs = 0; - if (req->flags & REQ_F_LINK_HEAD) - list_for_each_entry(pos, &req->link_list, link_list) - nr_reqs++; + io_for_each_link(pos, req) + nr_reqs++; total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped; return total_submitted - nr_reqs; @@ -5852,12 +6104,13 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe) static void io_req_drop_files(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; + struct io_uring_task *tctx = req->task->io_uring; unsigned long flags; spin_lock_irqsave(&ctx->inflight_lock, flags); list_del(&req->inflight_entry); - if (waitqueue_active(&ctx->inflight_wait)) - wake_up(&ctx->inflight_wait); + if (atomic_read(&tctx->in_idle)) + wake_up(&tctx->wait); spin_unlock_irqrestore(&ctx->inflight_lock, flags); req->flags &= ~REQ_F_INFLIGHT; put_files_struct(req->work.identity->files); @@ -5912,6 +6165,13 @@ static void __io_clean_op(struct io_kiocb *req) if (req->open.filename) putname(req->open.filename); break; + case IORING_OP_RENAMEAT: + putname(req->rename.oldpath); + putname(req->rename.newpath); + break; + case IORING_OP_UNLINKAT: + putname(req->unlink.filename); + break; } req->flags &= ~REQ_F_NEED_CLEANUP; } @@ -6018,6 +6278,15 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, case IORING_OP_TEE: ret = io_tee(req, force_nonblock); break; + case IORING_OP_SHUTDOWN: + ret = io_shutdown(req, force_nonblock); + break; + case IORING_OP_RENAMEAT: + ret = io_renameat(req, force_nonblock); + break; + case IORING_OP_UNLINKAT: + ret = io_unlinkat(req, force_nonblock); + break; default: ret = -EINVAL; break; @@ -6034,7 +6303,7 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, if (in_async) mutex_lock(&ctx->uring_lock); - io_iopoll_req_issued(req); + io_iopoll_req_issued(req, in_async); if (in_async) mutex_unlock(&ctx->uring_lock); @@ -6074,8 +6343,19 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) } if (ret) { - req_set_fail_links(req); - io_req_complete(req, ret); + /* + * io_iopoll_complete() does not hold completion_lock to complete + * polled io, so here for polled io, just mark it done and still let + * io_iopoll_complete() complete it. + */ + if (req->ctx->flags & IORING_SETUP_IOPOLL) { + struct kiocb *kiocb = &req->rw.kiocb; + + kiocb_done(kiocb, ret, NULL); + } else { + req_set_fail_links(req); + io_req_complete(req, ret); + } } return io_steal_work(req); @@ -6101,10 +6381,7 @@ static struct file *io_file_get(struct io_submit_state *state, return NULL; fd = array_index_nospec(fd, ctx->nr_user_files); file = io_file_from_index(ctx, fd); - if (file) { - req->fixed_file_refs = &ctx->file_data->node->refs; - percpu_ref_get(req->fixed_file_refs); - } + io_set_resource_node(req); } else { trace_io_uring_file_get(ctx, fd); file = __io_file_get(state, fd); @@ -6113,45 +6390,26 @@ static struct file *io_file_get(struct io_submit_state *state, return file; } -static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, - int fd) -{ - bool fixed; - - fixed = (req->flags & REQ_F_FIXED_FILE) != 0; - if (unlikely(!fixed && io_async_submit(req->ctx))) - return -EBADF; - - req->file = io_file_get(state, req, fd, fixed); - if (req->file || io_op_defs[req->opcode].needs_file_no_error) - return 0; - return -EBADF; -} - static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) { struct io_timeout_data *data = container_of(timer, struct io_timeout_data, timer); - struct io_kiocb *req = data->req; + struct io_kiocb *prev, *req = data->req; struct io_ring_ctx *ctx = req->ctx; - struct io_kiocb *prev = NULL; unsigned long flags; spin_lock_irqsave(&ctx->completion_lock, flags); + prev = req->timeout.head; + req->timeout.head = NULL; /* * We don't expect the list to be empty, that will only happen if we * race with the completion of the linked work. */ - if (!list_empty(&req->link_list)) { - prev = list_entry(req->link_list.prev, struct io_kiocb, - link_list); - if (refcount_inc_not_zero(&prev->refs)) - list_del_init(&req->link_list); - else - prev = NULL; - } - + if (prev && refcount_inc_not_zero(&prev->refs)) + io_remove_next_linked(prev); + else + prev = NULL; spin_unlock_irqrestore(&ctx->completion_lock, flags); if (prev) { @@ -6167,10 +6425,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) static void __io_queue_linked_timeout(struct io_kiocb *req) { /* - * If the list is now empty, then our linked request finished before - * we got a chance to setup the timer + * If the back reference is NULL, then our linked request finished + * before we got a chance to setup the timer */ - if (!list_empty(&req->link_list)) { + if (req->timeout.head) { struct io_timeout_data *data = req->async_data; data->timer.function = io_link_timeout_fn; @@ -6193,18 +6451,13 @@ static void io_queue_linked_timeout(struct io_kiocb *req) static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) { - struct io_kiocb *nxt; + struct io_kiocb *nxt = req->link; - if (!(req->flags & REQ_F_LINK_HEAD)) - return NULL; - if (req->flags & REQ_F_LINK_TIMEOUT) - return NULL; - - nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, - link_list); - if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT) + if (!nxt || (req->flags & REQ_F_LINK_TIMEOUT) || + nxt->opcode != IORING_OP_LINK_TIMEOUT) return NULL; + nxt->timeout.head = req; nxt->flags |= REQ_F_LTIMEOUT_ACTIVE; req->flags |= REQ_F_LINK_TIMEOUT; return nxt; @@ -6310,8 +6563,13 @@ static inline void io_queue_link_head(struct io_kiocb *req, io_queue_sqe(req, NULL, cs); } +struct io_submit_link { + struct io_kiocb *head; + struct io_kiocb *last; +}; + static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, - struct io_kiocb **link, struct io_comp_state *cs) + struct io_submit_link *link, struct io_comp_state *cs) { struct io_ring_ctx *ctx = req->ctx; int ret; @@ -6323,8 +6581,8 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, * submitted sync once the chain is complete. If none of those * conditions are true (normal request), then just queue it. */ - if (*link) { - struct io_kiocb *head = *link; + if (link->head) { + struct io_kiocb *head = link->head; /* * Taking sequential execution of a link, draining both sides @@ -6344,12 +6602,13 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, return ret; } trace_io_uring_link(ctx, req, head); - list_add_tail(&req->link_list, &head->link_list); + link->last->link = req; + link->last = req; /* last request of a link, enqueue the link */ if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { io_queue_link_head(head, cs); - *link = NULL; + link->head = NULL; } } else { if (unlikely(ctx->drain_next)) { @@ -6357,13 +6616,11 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, ctx->drain_next = 0; } if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { - req->flags |= REQ_F_LINK_HEAD; - INIT_LIST_HEAD(&req->link_list); - ret = io_req_defer_prep(req, sqe); if (unlikely(ret)) req->flags |= REQ_F_FAIL_LINK; - *link = req; + link->head = req; + link->last = req; } else { io_queue_sqe(req, sqe, cs); } @@ -6379,7 +6636,8 @@ static void io_submit_state_end(struct io_submit_state *state) { if (!list_empty(&state->comp.list)) io_submit_flush_completions(&state->comp); - blk_finish_plug(&state->plug); + if (state->plug_started) + blk_finish_plug(&state->plug); io_state_file_put(state); if (state->free_reqs) kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); @@ -6391,12 +6649,12 @@ static void io_submit_state_end(struct io_submit_state *state) static void io_submit_state_start(struct io_submit_state *state, struct io_ring_ctx *ctx, unsigned int max_ios) { - blk_start_plug(&state->plug); + state->plug_started = false; state->comp.nr = 0; INIT_LIST_HEAD(&state->comp.list); state->comp.ctx = ctx; state->free_reqs = 0; - state->file = NULL; + state->file_refs = 0; state->ios_left = max_ios; } @@ -6491,6 +6749,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->file = NULL; req->ctx = ctx; req->flags = 0; + req->link = NULL; + req->fixed_file_refs = NULL; /* one is dropped after submission, the other at completion */ refcount_set(&req->refs, 2); req->task = current; @@ -6499,7 +6759,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (unlikely(req->opcode >= IORING_OP_LAST)) return -EINVAL; - if (unlikely(io_sq_thread_acquire_mm(ctx, req))) + if (unlikely(io_sq_thread_acquire_mm_files(ctx, req))) return -EFAULT; sqe_flags = READ_ONCE(sqe->flags); @@ -6532,10 +6792,26 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, /* same numerical values with corresponding REQ_F_*, safe to copy */ req->flags |= sqe_flags; - if (!io_op_defs[req->opcode].needs_file) - return 0; + /* + * Plug now if we have more than 1 IO left after this, and the target + * is potentially a read/write to block based storage. + */ + if (!state->plug_started && state->ios_left > 1 && + io_op_defs[req->opcode].plug) { + blk_start_plug(&state->plug); + state->plug_started = true; + } + + ret = 0; + if (io_op_defs[req->opcode].needs_file) { + bool fixed = req->flags & REQ_F_FIXED_FILE; + + req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed); + if (unlikely(!req->file && + !io_op_defs[req->opcode].needs_file_no_error)) + ret = -EBADF; + } - ret = io_req_set_file(state, req, READ_ONCE(sqe->fd)); state->ios_left--; return ret; } @@ -6543,7 +6819,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) { struct io_submit_state state; - struct io_kiocb *link = NULL; + struct io_submit_link link; int i, submitted = 0; /* if we have a backlog and couldn't flush it all, return BUSY */ @@ -6563,6 +6839,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) refcount_add(nr, ¤t->usage); io_submit_state_start(&state, ctx, nr); + link.head = NULL; for (i = 0; i < nr; i++) { const struct io_uring_sqe *sqe; @@ -6608,8 +6885,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) percpu_counter_sub(&tctx->inflight, unused); put_task_struct_many(current, unused); } - if (link) - io_queue_link_head(link, &state.comp); + if (link.head) + io_queue_link_head(link.head, &state.comp); io_submit_state_end(&state); /* Commit SQ ring head once we've consumed and submitted all SQEs */ @@ -6633,111 +6910,45 @@ static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx) spin_unlock_irq(&ctx->completion_lock); } -static int io_sq_wake_function(struct wait_queue_entry *wqe, unsigned mode, - int sync, void *key) +static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) { - struct io_ring_ctx *ctx = container_of(wqe, struct io_ring_ctx, sqo_wait_entry); - int ret; - - ret = autoremove_wake_function(wqe, mode, sync, key); - if (ret) { - unsigned long flags; - - spin_lock_irqsave(&ctx->completion_lock, flags); - ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; - spin_unlock_irqrestore(&ctx->completion_lock, flags); - } - return ret; -} - -enum sq_ret { - SQT_IDLE = 1, - SQT_SPIN = 2, - SQT_DID_WORK = 4, -}; - -static enum sq_ret __io_sq_thread(struct io_ring_ctx *ctx, - unsigned long start_jiffies, bool cap_entries) -{ - unsigned long timeout = start_jiffies + ctx->sq_thread_idle; - struct io_sq_data *sqd = ctx->sq_data; unsigned int to_submit; int ret = 0; -again: - if (!list_empty(&ctx->iopoll_list)) { - unsigned nr_events = 0; - - mutex_lock(&ctx->uring_lock); - if (!list_empty(&ctx->iopoll_list) && !need_resched()) - io_do_iopoll(ctx, &nr_events, 0); - mutex_unlock(&ctx->uring_lock); - } - to_submit = io_sqring_entries(ctx); - - /* - * If submit got -EBUSY, flag us as needing the application - * to enter the kernel to reap and flush events. - */ - if (!to_submit || ret == -EBUSY || need_resched()) { - /* - * Drop cur_mm before scheduling, we can't hold it for - * long periods (or over schedule()). Do this before - * adding ourselves to the waitqueue, as the unuse/drop - * may sleep. - */ - io_sq_thread_drop_mm(); - - /* - * We're polling. If we're within the defined idle - * period, then let us spin without work before going - * to sleep. The exception is if we got EBUSY doing - * more IO, we should wait for the application to - * reap events and wake us up. - */ - if (!list_empty(&ctx->iopoll_list) || need_resched() || - (!time_after(jiffies, timeout) && ret != -EBUSY && - !percpu_ref_is_dying(&ctx->refs))) - return SQT_SPIN; - - prepare_to_wait(&sqd->wait, &ctx->sqo_wait_entry, - TASK_INTERRUPTIBLE); - - /* - * While doing polled IO, before going to sleep, we need - * to check if there are new reqs added to iopoll_list, - * it is because reqs may have been punted to io worker - * and will be added to iopoll_list later, hence check - * the iopoll_list again. - */ - if ((ctx->flags & IORING_SETUP_IOPOLL) && - !list_empty_careful(&ctx->iopoll_list)) { - finish_wait(&sqd->wait, &ctx->sqo_wait_entry); - goto again; - } - - to_submit = io_sqring_entries(ctx); - if (!to_submit || ret == -EBUSY) - return SQT_IDLE; - } - - finish_wait(&sqd->wait, &ctx->sqo_wait_entry); - io_ring_clear_wakeup_flag(ctx); - /* if we're handling multiple rings, cap submit size for fairness */ if (cap_entries && to_submit > 8) to_submit = 8; - mutex_lock(&ctx->uring_lock); - if (likely(!percpu_ref_is_dying(&ctx->refs))) - ret = io_submit_sqes(ctx, to_submit); - mutex_unlock(&ctx->uring_lock); + if (!list_empty(&ctx->iopoll_list) || to_submit) { + unsigned nr_events = 0; + + mutex_lock(&ctx->uring_lock); + if (!list_empty(&ctx->iopoll_list)) + io_do_iopoll(ctx, &nr_events, 0); + + if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs))) + ret = io_submit_sqes(ctx, to_submit); + mutex_unlock(&ctx->uring_lock); + } if (!io_sqring_full(ctx) && wq_has_sleeper(&ctx->sqo_sq_wait)) wake_up(&ctx->sqo_sq_wait); - return SQT_DID_WORK; + return ret; +} + +static void io_sqd_update_thread_idle(struct io_sq_data *sqd) +{ + struct io_ring_ctx *ctx; + unsigned sq_thread_idle = 0; + + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + if (sq_thread_idle < ctx->sq_thread_idle) + sq_thread_idle = ctx->sq_thread_idle; + } + + sqd->sq_thread_idle = sq_thread_idle; } static void io_sqd_init_new(struct io_sq_data *sqd) @@ -6746,39 +6957,56 @@ static void io_sqd_init_new(struct io_sq_data *sqd) while (!list_empty(&sqd->ctx_new_list)) { ctx = list_first_entry(&sqd->ctx_new_list, struct io_ring_ctx, sqd_list); - init_wait(&ctx->sqo_wait_entry); - ctx->sqo_wait_entry.func = io_sq_wake_function; list_move_tail(&ctx->sqd_list, &sqd->ctx_list); complete(&ctx->sq_thread_comp); } + + io_sqd_update_thread_idle(sqd); } static int io_sq_thread(void *data) { struct cgroup_subsys_state *cur_css = NULL; + struct files_struct *old_files = current->files; + struct nsproxy *old_nsproxy = current->nsproxy; const struct cred *old_cred = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; - unsigned long start_jiffies; + unsigned long timeout = 0; + DEFINE_WAIT(wait); + + task_lock(current); + current->files = NULL; + current->nsproxy = NULL; + task_unlock(current); - start_jiffies = jiffies; while (!kthread_should_stop()) { - enum sq_ret ret = 0; - bool cap_entries; + int ret; + bool cap_entries, sqt_spin, needs_sched; /* * Any changes to the sqd lists are synchronized through the * kthread parking. This synchronizes the thread vs users, * the users are synchronized on the sqd->ctx_lock. */ - if (kthread_should_park()) + if (kthread_should_park()) { kthread_parkme(); + /* + * When sq thread is unparked, in case the previous park operation + * comes from io_put_sq_data(), which means that sq thread is going + * to be stopped, so here needs to have a check. + */ + if (kthread_should_stop()) + break; + } - if (unlikely(!list_empty(&sqd->ctx_new_list))) + if (unlikely(!list_empty(&sqd->ctx_new_list))) { io_sqd_init_new(sqd); + timeout = jiffies + sqd->sq_thread_idle; + } + sqt_spin = false; cap_entries = !list_is_singular(&sqd->ctx_list); - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { if (current->cred != ctx->creds) { if (old_cred) @@ -6791,24 +7019,49 @@ static int io_sq_thread(void *data) current->sessionid = ctx->sessionid; #endif - ret |= __io_sq_thread(ctx, start_jiffies, cap_entries); + ret = __io_sq_thread(ctx, cap_entries); + if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list))) + sqt_spin = true; - io_sq_thread_drop_mm(); + io_sq_thread_drop_mm_files(); } - if (ret & SQT_SPIN) { + if (sqt_spin || !time_after(jiffies, timeout)) { io_run_task_work(); cond_resched(); - } else if (ret == SQT_IDLE) { - if (kthread_should_park()) - continue; + if (sqt_spin) + timeout = jiffies + sqd->sq_thread_idle; + continue; + } + + if (kthread_should_park()) + continue; + + needs_sched = true; + prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + if ((ctx->flags & IORING_SETUP_IOPOLL) && + !list_empty_careful(&ctx->iopoll_list)) { + needs_sched = false; + break; + } + if (io_sqring_entries(ctx)) { + needs_sched = false; + break; + } + } + + if (needs_sched) { list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_set_wakeup_flag(ctx); + schedule(); - start_jiffies = jiffies; list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_clear_wakeup_flag(ctx); } + + finish_wait(&sqd->wait, &wait); + timeout = jiffies + sqd->sq_thread_idle; } io_run_task_work(); @@ -6818,6 +7071,11 @@ static int io_sq_thread(void *data) if (old_cred) revert_creds(old_cred); + task_lock(current); + current->files = old_files; + current->nsproxy = old_nsproxy; + task_unlock(current); + kthread_parkme(); return 0; @@ -6862,13 +7120,8 @@ static int io_run_task_work_sig(void) return 1; if (!signal_pending(current)) return 0; - if (current->jobctl & JOBCTL_TASK_WORK) { - spin_lock_irq(¤t->sighand->siglock); - current->jobctl &= ~JOBCTL_TASK_WORK; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - return 1; - } + if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL)) + return -ERESTARTSYS; return -EINTR; } @@ -6877,7 +7130,8 @@ static int io_run_task_work_sig(void) * application must reap them itself, as they reside on the shared cq ring. */ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, - const sigset_t __user *sig, size_t sigsz) + const sigset_t __user *sig, size_t sigsz, + struct __kernel_timespec __user *uts) { struct io_wait_queue iowq = { .wq = { @@ -6889,6 +7143,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, .to_wait = min_events, }; struct io_rings *rings = ctx->rings; + struct timespec64 ts; + signed long timeout = 0; int ret = 0; do { @@ -6911,6 +7167,12 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return ret; } + if (uts) { + if (get_timespec64(&ts, uts)) + return -EFAULT; + timeout = timespec64_to_jiffies(&ts); + } + iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); trace_io_uring_cqring_wait(ctx, min_events); do { @@ -6924,7 +7186,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, break; if (io_should_wake(&iowq, false)) break; - schedule(); + if (uts) { + timeout = schedule_timeout(timeout); + if (timeout == 0) { + ret = -ETIME; + break; + } + } else { + schedule(); + } } while (1); finish_wait(&ctx->wait, &iowq.wq); @@ -6973,9 +7243,9 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) if (!data) return -ENXIO; - spin_lock(&data->lock); + spin_lock_bh(&data->lock); ref_node = data->node; - spin_unlock(&data->lock); + spin_unlock_bh(&data->lock); if (ref_node) percpu_ref_kill(&ref_node->refs); @@ -7098,12 +7368,11 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx) mutex_lock(&sqd->ctx_lock); list_del(&ctx->sqd_list); + io_sqd_update_thread_idle(sqd); mutex_unlock(&sqd->ctx_lock); - if (sqd->thread) { - finish_wait(&sqd->wait, &ctx->sqo_wait_entry); + if (sqd->thread) io_sq_thread_unpark(sqd); - } io_put_sq_data(sqd); ctx->sq_data = NULL; @@ -7358,7 +7627,7 @@ static void io_file_data_ref_zero(struct percpu_ref *ref) data = ref_node->file_data; ctx = data->ctx; - spin_lock(&data->lock); + spin_lock_bh(&data->lock); ref_node->done = true; while (!list_empty(&data->ref_list)) { @@ -7370,7 +7639,7 @@ static void io_file_data_ref_zero(struct percpu_ref *ref) list_del(&ref_node->node); first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist); } - spin_unlock(&data->lock); + spin_unlock_bh(&data->lock); if (percpu_ref_is_dying(&data->refs)) delay = 0; @@ -7493,9 +7762,9 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, } file_data->node = ref_node; - spin_lock(&file_data->lock); + spin_lock_bh(&file_data->lock); list_add_tail(&ref_node->node, &file_data->ref_list); - spin_unlock(&file_data->lock); + spin_unlock_bh(&file_data->lock); percpu_ref_get(&file_data->refs); return ret; out_fput: @@ -7652,10 +7921,10 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, if (needs_switch) { percpu_ref_kill(&data->node->refs); - spin_lock(&data->lock); + spin_lock_bh(&data->lock); list_add_tail(&ref_node->node, &data->ref_list); data->node = ref_node; - spin_unlock(&data->lock); + spin_unlock_bh(&data->lock); percpu_ref_get(&ctx->file_data->refs); } else destroy_fixed_file_ref_node(ref_node); @@ -7783,7 +8052,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, struct io_sq_data *sqd; ret = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) goto err; sqd = io_get_sq_data(p); @@ -8369,8 +8638,6 @@ static void io_ring_exit_work(struct work_struct *work) * as nobody else will be looking for them. */ do { - if (ctx->rings) - io_cqring_overflow_flush(ctx, true, NULL, NULL); io_iopoll_try_reap_events(ctx); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); @@ -8380,17 +8647,17 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); percpu_ref_kill(&ctx->refs); + if (ctx->rings) + io_cqring_overflow_flush(ctx, true, NULL, NULL); mutex_unlock(&ctx->uring_lock); - io_kill_timeouts(ctx, NULL); - io_poll_remove_all(ctx, NULL); + io_kill_timeouts(ctx, NULL, NULL); + io_poll_remove_all(ctx, NULL, NULL); if (ctx->io_wq) io_wq_cancel_all(ctx->io_wq); /* if we failed setting up the ctx, we might not have any rings */ - if (ctx->rings) - io_cqring_overflow_flush(ctx, true, NULL, NULL); io_iopoll_try_reap_events(ctx); idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx); @@ -8421,120 +8688,31 @@ static int io_uring_release(struct inode *inode, struct file *file) return 0; } -static bool io_wq_files_match(struct io_wq_work *work, void *data) -{ - struct files_struct *files = data; +struct io_task_cancel { + struct task_struct *task; + struct files_struct *files; +}; - return !files || ((work->flags & IO_WQ_WORK_FILES) && - work->identity->files == files); -} - -/* - * Returns true if 'preq' is the link parent of 'req' - */ -static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req) -{ - struct io_kiocb *link; - - if (!(preq->flags & REQ_F_LINK_HEAD)) - return false; - - list_for_each_entry(link, &preq->link_list, link_list) { - if (link == req) - return true; - } - - return false; -} - -/* - * We're looking to cancel 'req' because it's holding on to our files, but - * 'req' could be a link to another request. See if it is, and cancel that - * parent request if so. - */ -static bool io_poll_remove_link(struct io_ring_ctx *ctx, struct io_kiocb *req) -{ - struct hlist_node *tmp; - struct io_kiocb *preq; - bool found = false; - int i; - - spin_lock_irq(&ctx->completion_lock); - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct hlist_head *list; - - list = &ctx->cancel_hash[i]; - hlist_for_each_entry_safe(preq, tmp, list, hash_node) { - found = io_match_link(preq, req); - if (found) { - io_poll_remove_one(preq); - break; - } - } - } - spin_unlock_irq(&ctx->completion_lock); - return found; -} - -static bool io_timeout_remove_link(struct io_ring_ctx *ctx, - struct io_kiocb *req) -{ - struct io_kiocb *preq; - bool found = false; - - spin_lock_irq(&ctx->completion_lock); - list_for_each_entry(preq, &ctx->timeout_list, timeout.list) { - found = io_match_link(preq, req); - if (found) { - __io_timeout_cancel(preq); - break; - } - } - spin_unlock_irq(&ctx->completion_lock); - return found; -} - -static bool io_cancel_link_cb(struct io_wq_work *work, void *data) +static bool io_cancel_task_cb(struct io_wq_work *work, void *data) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct io_task_cancel *cancel = data; bool ret; - if (req->flags & REQ_F_LINK_TIMEOUT) { + if (cancel->files && (req->flags & REQ_F_LINK_TIMEOUT)) { unsigned long flags; struct io_ring_ctx *ctx = req->ctx; /* protect against races with linked timeouts */ spin_lock_irqsave(&ctx->completion_lock, flags); - ret = io_match_link(req, data); + ret = io_match_task(req, cancel->task, cancel->files); spin_unlock_irqrestore(&ctx->completion_lock, flags); } else { - ret = io_match_link(req, data); + ret = io_match_task(req, cancel->task, cancel->files); } return ret; } -static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) -{ - enum io_wq_cancel cret; - - /* cancel this particular work, if it's running */ - cret = io_wq_cancel_work(ctx->io_wq, &req->work); - if (cret != IO_WQ_CANCEL_NOTFOUND) - return; - - /* find links that hold this pending, cancel those */ - cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true); - if (cret != IO_WQ_CANCEL_NOTFOUND) - return; - - /* if we have a poll link holding this pending, cancel that */ - if (io_poll_remove_link(ctx, req)) - return; - - /* final option, timeout link is holding this req pending */ - io_timeout_remove_link(ctx, req); -} - static void io_cancel_defer_files(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) @@ -8544,8 +8722,7 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, spin_lock_irq(&ctx->completion_lock); list_for_each_entry_reverse(de, &ctx->defer_list, list) { - if (io_task_match(de->req, task) && - io_match_files(de->req, files)) { + if (io_match_task(de->req, task, files)) { list_cut_position(&list, &ctx->defer_list, &de->list); break; } @@ -8562,72 +8739,52 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, } } -/* - * Returns true if we found and killed one or more files pinning requests - */ -static bool io_uring_cancel_files(struct io_ring_ctx *ctx, +static void io_uring_cancel_files(struct io_ring_ctx *ctx, + struct task_struct *task, struct files_struct *files) { - if (list_empty_careful(&ctx->inflight_list)) - return false; - - /* cancel all at once, should be faster than doing it one by one*/ - io_wq_cancel_cb(ctx->io_wq, io_wq_files_match, files, true); - while (!list_empty_careful(&ctx->inflight_list)) { - struct io_kiocb *cancel_req = NULL, *req; + struct io_task_cancel cancel = { .task = task, .files = files }; + struct io_kiocb *req; DEFINE_WAIT(wait); + bool found = false; spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { - if (files && (req->work.flags & IO_WQ_WORK_FILES) && + if (req->task != task || req->work.identity->files != files) continue; - /* req is being completed, ignore */ - if (!refcount_inc_not_zero(&req->refs)) - continue; - cancel_req = req; + found = true; break; } - if (cancel_req) - prepare_to_wait(&ctx->inflight_wait, &wait, - TASK_UNINTERRUPTIBLE); + if (found) + prepare_to_wait(&task->io_uring->wait, &wait, + TASK_UNINTERRUPTIBLE); spin_unlock_irq(&ctx->inflight_lock); /* We need to keep going until we don't find a matching req */ - if (!cancel_req) + if (!found) break; - /* cancel this request, or head link requests */ - io_attempt_cancel(ctx, cancel_req); - io_put_req(cancel_req); + + io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); + io_poll_remove_all(ctx, task, files); + io_kill_timeouts(ctx, task, files); /* cancellations _may_ trigger task work */ io_run_task_work(); schedule(); - finish_wait(&ctx->inflight_wait, &wait); + finish_wait(&task->io_uring->wait, &wait); } - - return true; } -static bool io_cancel_task_cb(struct io_wq_work *work, void *data) +static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, + struct task_struct *task) { - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - struct task_struct *task = data; - - return io_task_match(req, task); -} - -static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, - struct task_struct *task, - struct files_struct *files) -{ - bool ret; - - ret = io_uring_cancel_files(ctx, files); - if (!files) { + while (1) { + struct io_task_cancel cancel = { .task = task, .files = NULL, }; enum io_wq_cancel cret; + bool ret = false; - cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, task, true); + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); if (cret != IO_WQ_CANCEL_NOTFOUND) ret = true; @@ -8639,11 +8796,13 @@ static bool __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, } } - ret |= io_poll_remove_all(ctx, task); - ret |= io_kill_timeouts(ctx, task); + ret |= io_poll_remove_all(ctx, task, NULL); + ret |= io_kill_timeouts(ctx, task, NULL); + if (!ret) + break; + io_run_task_work(); + cond_resched(); } - - return ret; } /* @@ -8662,17 +8821,15 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, io_sq_thread_park(ctx->sq_data); } - if (files) - io_cancel_defer_files(ctx, NULL, files); - else - io_cancel_defer_files(ctx, task, NULL); - + io_cancel_defer_files(ctx, task, files); + io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); io_cqring_overflow_flush(ctx, true, task, files); + io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); - while (__io_uring_cancel_task_requests(ctx, task, files)) { - io_run_task_work(); - cond_resched(); - } + if (!files) + __io_uring_cancel_task_requests(ctx, task); + else + io_uring_cancel_files(ctx, task, files); if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle); @@ -8930,9 +9087,39 @@ static void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) finish_wait(&ctx->sqo_sq_wait, &wait); } +static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, + struct __kernel_timespec __user **ts, + const sigset_t __user **sig) +{ + struct io_uring_getevents_arg arg; + + /* + * If EXT_ARG isn't set, then we have no timespec and the argp pointer + * is just a pointer to the sigset_t. + */ + if (!(flags & IORING_ENTER_EXT_ARG)) { + *sig = (const sigset_t __user *) argp; + *ts = NULL; + return 0; + } + + /* + * EXT_ARG is set - ensure we agree on the size of it and copy in our + * timespec and sigset_t pointers if good. + */ + if (*argsz != sizeof(arg)) + return -EINVAL; + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + *sig = u64_to_user_ptr(arg.sigmask); + *argsz = arg.sigmask_sz; + *ts = u64_to_user_ptr(arg.ts); + return 0; +} + SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, - u32, min_complete, u32, flags, const sigset_t __user *, sig, - size_t, sigsz) + u32, min_complete, u32, flags, const void __user *, argp, + size_t, argsz) { struct io_ring_ctx *ctx; long ret = -EBADF; @@ -8942,7 +9129,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, io_run_task_work(); if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | - IORING_ENTER_SQ_WAIT)) + IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG)) return -EINVAL; f = fdget(fd); @@ -8969,8 +9156,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, */ ret = 0; if (ctx->flags & IORING_SETUP_SQPOLL) { + io_ring_submit_lock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); if (!list_empty_careful(&ctx->cq_overflow_list)) io_cqring_overflow_flush(ctx, false, NULL, NULL); + io_ring_submit_unlock(ctx, (ctx->flags & IORING_SETUP_IOPOLL)); if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) @@ -8988,6 +9177,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, goto out; } if (flags & IORING_ENTER_GETEVENTS) { + const sigset_t __user *sig; + struct __kernel_timespec __user *ts; + + ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); + if (unlikely(ret)) + goto out; + min_complete = min(min_complete, ctx->cq_entries); /* @@ -9000,7 +9196,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, !(ctx->flags & IORING_SETUP_SQPOLL)) { ret = io_iopoll_check(ctx, min_complete); } else { - ret = io_cqring_wait(ctx, min_complete, sig, sigsz); + ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts); } } @@ -9368,7 +9564,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | - IORING_FEAT_POLL_32BITS; + IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | + IORING_FEAT_EXT_ARG; if (copy_to_user(params, p, sizeof(*p))) { ret = -EFAULT; diff --git a/fs/namei.c b/fs/namei.c index 71e4d8981c84..d1c1d23c44c9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4430,8 +4430,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, } EXPORT_SYMBOL(vfs_rename); -static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, - const char __user *newname, unsigned int flags) +int do_renameat2(int olddfd, struct filename *from, int newdfd, + struct filename *to, unsigned int flags) { struct dentry *old_dentry, *new_dentry; struct dentry *trap; @@ -4439,32 +4439,30 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode = NULL; - struct filename *from; - struct filename *to; unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; bool should_retry = false; - int error; + int error = -EINVAL; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) - return -EINVAL; + goto put_both; if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && (flags & RENAME_EXCHANGE)) - return -EINVAL; + goto put_both; if (flags & RENAME_EXCHANGE) target_flags = 0; retry: - from = filename_parentat(olddfd, getname(oldname), lookup_flags, - &old_path, &old_last, &old_type); + from = filename_parentat(olddfd, from, lookup_flags, &old_path, + &old_last, &old_type); if (IS_ERR(from)) { error = PTR_ERR(from); - goto exit; + goto put_new; } - to = filename_parentat(newdfd, getname(newname), lookup_flags, - &new_path, &new_last, &new_type); + to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last, + &new_type); if (IS_ERR(to)) { error = PTR_ERR(to); goto exit1; @@ -4557,34 +4555,40 @@ static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, if (retry_estale(error, lookup_flags)) should_retry = true; path_put(&new_path); - putname(to); exit1: path_put(&old_path); - putname(from); if (should_retry) { should_retry = false; lookup_flags |= LOOKUP_REVAL; goto retry; } -exit: +put_both: + if (!IS_ERR(from)) + putname(from); +put_new: + if (!IS_ERR(to)) + putname(to); return error; } SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags) { - return do_renameat2(olddfd, oldname, newdfd, newname, flags); + return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname), + flags); } SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname) { - return do_renameat2(olddfd, oldname, newdfd, newname, 0); + return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname), + 0); } SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { - return do_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); + return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD, + getname(newname), 0); } int readlink_copy(char __user *buffer, int buflen, const char *link) diff --git a/fs/pipe.c b/fs/pipe.c index 0ac197658a2d..c5989cfd564d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1342,9 +1342,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) } /* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. + * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is + * not enough to verify that this is a pipe. */ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) { diff --git a/fs/proc/base.c b/fs/proc/base.c index 8aebdc1c0cdc..bbfd3fa33b63 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3267,6 +3267,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif +#ifdef CONFIG_SECCOMP_CACHE_DEBUG + ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3596,6 +3599,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif +#ifdef CONFIG_SECCOMP_CACHE_DEBUG + ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), +#endif #ifdef CONFIG_CPU_FREQ_TIMES ONE("time_in_state", 0444, proc_time_in_state_show), #endif diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index e16a49ebfe54..8adabde685f1 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -14,6 +14,14 @@ config PSTORE If you don't have a platform persistent store driver, say N. +config PSTORE_DEFAULT_KMSG_BYTES + int "Default kernel log storage space" if EXPERT + depends on PSTORE + default "10240" + help + Defines default size of pstore kernel log storage. + Can be enlarged if needed, not recommended to shrink it. + config PSTORE_DEFLATE_COMPRESS tristate "DEFLATE (ZLIB) compression" default y diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c index fcd5563dde06..4bb8a344957a 100644 --- a/fs/pstore/blk.c +++ b/fs/pstore/blk.c @@ -90,7 +90,6 @@ MODULE_PARM_DESC(blkdev, "block device for pstore storage"); static DEFINE_MUTEX(pstore_blk_lock); static struct block_device *psblk_bdev; static struct pstore_zone_info *pstore_zone_info; -static pstore_blk_panic_write_op blkdev_panic_write; struct bdev_info { dev_t devt; @@ -245,7 +244,7 @@ static struct block_device *psblk_get_bdev(void *holder, return bdev; } - nr_sects = part_nr_sects_read(bdev->bd_part); + nr_sects = bdev_nr_sectors(bdev); if (!nr_sects) { pr_err("not enough space for '%s'\n", blkdev); blkdev_put(bdev, mode); @@ -341,24 +340,11 @@ static ssize_t psblk_generic_blk_write(const char *buf, size_t bytes, return ret; } -static ssize_t psblk_blk_panic_write(const char *buf, size_t size, - loff_t off) -{ - int ret; - - if (!blkdev_panic_write) - return -EOPNOTSUPP; - - /* size and off must align to SECTOR_SIZE for block device */ - ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT, - size >> SECTOR_SHIFT); - /* try next zone */ - if (ret == -ENOMSG) - return ret; - return ret ? -EIO : size; -} - -static int __register_pstore_blk(struct pstore_blk_info *info) +/* + * This takes its configuration only from the module parameters now. + * See psblk_get_bdev() and blkdev. + */ +static int __register_pstore_blk(void) { char bdev_name[BDEVNAME_SIZE]; struct block_device *bdev; @@ -378,68 +364,34 @@ static int __register_pstore_blk(struct pstore_blk_info *info) } /* only allow driver matching the @blkdev */ - if (!binfo.devt || (!best_effort && - MAJOR(binfo.devt) != info->major)) { - pr_debug("invalid major %u (expect %u)\n", - info->major, MAJOR(binfo.devt)); + if (!binfo.devt) { + pr_debug("no major\n"); ret = -ENODEV; goto err_put_bdev; } /* psblk_bdev must be assigned before register to pstore/blk */ psblk_bdev = bdev; - blkdev_panic_write = info->panic_write; - - /* Copy back block device details. */ - info->devt = binfo.devt; - info->nr_sects = binfo.nr_sects; - info->start_sect = binfo.start_sect; memset(&dev, 0, sizeof(dev)); - dev.total_size = info->nr_sects << SECTOR_SHIFT; - dev.flags = info->flags; + dev.total_size = binfo.nr_sects << SECTOR_SHIFT; dev.read = psblk_generic_blk_read; dev.write = psblk_generic_blk_write; - dev.erase = NULL; - dev.panic_write = info->panic_write ? psblk_blk_panic_write : NULL; ret = __register_pstore_device(&dev); if (ret) goto err_put_bdev; bdevname(bdev, bdev_name); - pr_info("attached %s%s\n", bdev_name, - info->panic_write ? "" : " (no dedicated panic_write!)"); + pr_info("attached %s (no dedicated panic_write!)\n", bdev_name); return 0; err_put_bdev: psblk_bdev = NULL; - blkdev_panic_write = NULL; psblk_put_bdev(bdev, holder); return ret; } -/** - * register_pstore_blk() - register block device to pstore/blk - * - * @info: details on the desired block device interface - * - * Return: - * * 0 - OK - * * Others - something error. - */ -int register_pstore_blk(struct pstore_blk_info *info) -{ - int ret; - - mutex_lock(&pstore_blk_lock); - ret = __register_pstore_blk(info); - mutex_unlock(&pstore_blk_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(register_pstore_blk); - static void __unregister_pstore_blk(unsigned int major) { struct pstore_device_info dev = { .read = psblk_generic_blk_read }; @@ -449,24 +401,10 @@ static void __unregister_pstore_blk(unsigned int major) if (psblk_bdev && MAJOR(psblk_bdev->bd_dev) == major) { __unregister_pstore_device(&dev); psblk_put_bdev(psblk_bdev, holder); - blkdev_panic_write = NULL; psblk_bdev = NULL; } } -/** - * unregister_pstore_blk() - unregister block device from pstore/blk - * - * @major: the major device number of device - */ -void unregister_pstore_blk(unsigned int major) -{ - mutex_lock(&pstore_blk_lock); - __unregister_pstore_blk(major); - mutex_unlock(&pstore_blk_lock); -} -EXPORT_SYMBOL_GPL(unregister_pstore_blk); - /* get information of pstore/blk */ int pstore_blk_get_config(struct pstore_blk_config *info) { @@ -483,12 +421,11 @@ EXPORT_SYMBOL_GPL(pstore_blk_get_config); static int __init pstore_blk_init(void) { - struct pstore_blk_info info = { }; int ret = 0; mutex_lock(&pstore_blk_lock); if (!pstore_zone_info && best_effort && blkdev[0]) - ret = __register_pstore_blk(&info); + ret = __register_pstore_blk(); mutex_unlock(&pstore_blk_lock); return ret; diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index c331efe8de95..93a217e4f563 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -266,7 +266,7 @@ static void parse_options(char *options) */ static int pstore_show_options(struct seq_file *m, struct dentry *root) { - if (kmsg_bytes != PSTORE_DEFAULT_KMSG_BYTES) + if (kmsg_bytes != CONFIG_PSTORE_DEFAULT_KMSG_BYTES) seq_printf(m, ",kmsg_bytes=%lu", kmsg_bytes); return 0; } diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 7fb219042f13..801d6c0b170c 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -6,7 +6,6 @@ #include #include -#define PSTORE_DEFAULT_KMSG_BYTES 10240 extern unsigned long kmsg_bytes; #ifdef CONFIG_PSTORE_FTRACE diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 36714df37d5d..32f64abc277c 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -101,7 +101,7 @@ static char *big_oops_buf; static size_t big_oops_buf_sz; /* How much of the console log to snapshot */ -unsigned long kmsg_bytes = PSTORE_DEFAULT_KMSG_BYTES; +unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES; void pstore_set_kmsg_bytes(int bytes) { diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c index 3ce89216670c..5266ccbec007 100644 --- a/fs/pstore/zone.c +++ b/fs/pstore/zone.c @@ -1299,6 +1299,10 @@ int register_pstore_zone(struct pstore_zone_info *info) pr_warn("total_size must be >= 4096\n"); return -EINVAL; } + if (info->total_size > SZ_128M) { + pr_warn("capping size to 128MiB\n"); + info->total_size = SZ_128M; + } if (!info->kmsg_size && !info->pmsg_size && !info->console_size && !info->ftrace_size) { diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 9af95c7a0bbe..6d16b2be5ac4 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -20,6 +20,7 @@ #include #include #include "compat.h" +#include "../internal.h" static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) @@ -865,27 +866,42 @@ static bool quotactl_cmd_onoff(int cmd) static struct super_block *quotactl_block(const char __user *special, int cmd) { #ifdef CONFIG_BLOCK - struct block_device *bdev; struct super_block *sb; struct filename *tmp = getname(special); + bool excl = false, thawed = false; + int error; + dev_t dev; if (IS_ERR(tmp)) return ERR_CAST(tmp); - bdev = lookup_bdev(tmp->name); + error = lookup_bdev(tmp->name, &dev); putname(tmp); - if (IS_ERR(bdev)) - return ERR_CAST(bdev); - if (quotactl_cmd_onoff(cmd)) - sb = get_super_exclusive_thawed(bdev); - else if (quotactl_cmd_write(cmd)) - sb = get_super_thawed(bdev); - else - sb = get_super(bdev); - bdput(bdev); + if (error) + return ERR_PTR(error); + + if (quotactl_cmd_onoff(cmd)) { + excl = true; + thawed = true; + } else if (quotactl_cmd_write(cmd)) { + thawed = true; + } + +retry: + sb = user_get_super(dev, excl); if (!sb) return ERR_PTR(-ENODEV); - + if (thawed && sb->s_writers.frozen != SB_UNFROZEN) { + if (excl) + up_write(&sb->s_umount); + else + up_read(&sb->s_umount); + wait_event(sb->s_writers.wait_unfrozen, + sb->s_writers.frozen == SB_UNFROZEN); + put_super(sb); + goto retry; + } return sb; + #else return ERR_PTR(-ENODEV); #endif diff --git a/fs/statfs.c b/fs/statfs.c index 59f33752c131..68cb07788750 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -235,7 +235,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user static int vfs_ustat(dev_t dev, struct kstatfs *sbuf) { - struct super_block *s = user_get_super(dev); + struct super_block *s = user_get_super(dev, false); int err; if (!s) return -EINVAL; diff --git a/fs/super.c b/fs/super.c index 98bb0629ee10..2c6cdea2ab2d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -307,7 +307,7 @@ static void __put_super(struct super_block *s) * Drops a temporary reference, frees superblock if there's no * references left. */ -static void put_super(struct super_block *sb) +void put_super(struct super_block *sb) { spin_lock(&sb_lock); __put_super(sb); @@ -740,7 +740,14 @@ void iterate_supers_type(struct file_system_type *type, EXPORT_SYMBOL(iterate_supers_type); -static struct super_block *__get_super(struct block_device *bdev, bool excl) +/** + * get_super - get the superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device given. %NULL is returned if no match is found. + */ +struct super_block *get_super(struct block_device *bdev) { struct super_block *sb; @@ -755,17 +762,11 @@ static struct super_block *__get_super(struct block_device *bdev, bool excl) if (sb->s_bdev == bdev) { sb->s_count++; spin_unlock(&sb_lock); - if (!excl) - down_read(&sb->s_umount); - else - down_write(&sb->s_umount); + down_read(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; - if (!excl) - up_read(&sb->s_umount); - else - up_write(&sb->s_umount); + up_read(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); @@ -776,66 +777,6 @@ static struct super_block *__get_super(struct block_device *bdev, bool excl) return NULL; } -/** - * get_super - get the superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device given. %NULL is returned if no match is found. - */ -struct super_block *get_super(struct block_device *bdev) -{ - return __get_super(bdev, false); -} -EXPORT_SYMBOL(get_super); - -static struct super_block *__get_super_thawed(struct block_device *bdev, - bool excl) -{ - while (1) { - struct super_block *s = __get_super(bdev, excl); - if (!s || s->s_writers.frozen == SB_UNFROZEN) - return s; - if (!excl) - up_read(&s->s_umount); - else - up_write(&s->s_umount); - wait_event(s->s_writers.wait_unfrozen, - s->s_writers.frozen == SB_UNFROZEN); - put_super(s); - } -} - -/** - * get_super_thawed - get thawed superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device. The superblock is returned once it is thawed - * (or immediately if it was not frozen). %NULL is returned if no match - * is found. - */ -struct super_block *get_super_thawed(struct block_device *bdev) -{ - return __get_super_thawed(bdev, false); -} -EXPORT_SYMBOL(get_super_thawed); - -/** - * get_super_exclusive_thawed - get thawed superblock of a device - * @bdev: device to get the superblock for - * - * Scans the superblock list and finds the superblock of the file system - * mounted on the device. The superblock is returned once it is thawed - * (or immediately if it was not frozen) and s_umount semaphore is held - * in exclusive mode. %NULL is returned if no match is found. - */ -struct super_block *get_super_exclusive_thawed(struct block_device *bdev) -{ - return __get_super_thawed(bdev, true); -} -EXPORT_SYMBOL(get_super_exclusive_thawed); - /** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for @@ -867,7 +808,7 @@ struct super_block *get_active_super(struct block_device *bdev) return NULL; } -struct super_block *user_get_super(dev_t dev) +struct super_block *user_get_super(dev_t dev, bool excl) { struct super_block *sb; @@ -879,11 +820,17 @@ struct super_block *user_get_super(dev_t dev) if (sb->s_dev == dev) { sb->s_count++; spin_unlock(&sb_lock); - down_read(&sb->s_umount); + if (excl) + down_write(&sb->s_umount); + else + down_read(&sb->s_umount); /* still alive? */ if (sb->s_root && (sb->s_flags & SB_BORN)) return sb; - up_read(&sb->s_umount); + if (excl) + up_write(&sb->s_umount); + else + up_read(&sb->s_umount); /* nope, got unmounted */ spin_lock(&sb_lock); __put_super(sb); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index ef1d5bb88b93..b7c5783a031c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -433,13 +433,10 @@ xfs_fs_goingdown( { switch (inflags) { case XFS_FSOP_GOING_FLAGS_DEFAULT: { - struct super_block *sb = freeze_bdev(mp->m_super->s_bdev); - - if (sb && !IS_ERR(sb)) { + if (!freeze_bdev(mp->m_super->s_bdev)) { xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); - thaw_bdev(sb->s_bdev, sb); + thaw_bdev(mp->m_super->s_bdev); } - break; } case XFS_FSOP_GOING_FLAGS_LOGFLUSH: diff --git a/include/dt-bindings/reset/bcm6318-reset.h b/include/dt-bindings/reset/bcm6318-reset.h new file mode 100644 index 000000000000..f882662505ea --- /dev/null +++ b/include/dt-bindings/reset/bcm6318-reset.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __DT_BINDINGS_RESET_BCM6318_H +#define __DT_BINDINGS_RESET_BCM6318_H + +#define BCM6318_RST_SPI 0 +#define BCM6318_RST_EPHY 1 +#define BCM6318_RST_SAR 2 +#define BCM6318_RST_ENETSW 3 +#define BCM6318_RST_USBD 4 +#define BCM6318_RST_USBH 5 +#define BCM6318_RST_PCIE_CORE 6 +#define BCM6318_RST_PCIE 7 +#define BCM6318_RST_PCIE_EXT 8 +#define BCM6318_RST_PCIE_HARD 9 +#define BCM6318_RST_ADSL 10 +#define BCM6318_RST_PHYMIPS 11 +#define BCM6318_RST_HOSTMIPS 12 + +#endif /* __DT_BINDINGS_RESET_BCM6318_H */ diff --git a/include/dt-bindings/reset/bcm63268-reset.h b/include/dt-bindings/reset/bcm63268-reset.h new file mode 100644 index 000000000000..6a6403a4c2d5 --- /dev/null +++ b/include/dt-bindings/reset/bcm63268-reset.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __DT_BINDINGS_RESET_BCM63268_H +#define __DT_BINDINGS_RESET_BCM63268_H + +#define BCM63268_RST_SPI 0 +#define BCM63268_RST_IPSEC 1 +#define BCM63268_RST_EPHY 2 +#define BCM63268_RST_SAR 3 +#define BCM63268_RST_ENETSW 4 +#define BCM63268_RST_USBS 5 +#define BCM63268_RST_USBH 6 +#define BCM63268_RST_PCM 7 +#define BCM63268_RST_PCIE_CORE 8 +#define BCM63268_RST_PCIE 9 +#define BCM63268_RST_PCIE_EXT 10 +#define BCM63268_RST_WLAN_SHIM 11 +#define BCM63268_RST_DDR_PHY 12 +#define BCM63268_RST_FAP0 13 +#define BCM63268_RST_WLAN_UBUS 14 +#define BCM63268_RST_DECT 15 +#define BCM63268_RST_FAP1 16 +#define BCM63268_RST_PCIE_HARD 17 +#define BCM63268_RST_GPHY 18 + +#endif /* __DT_BINDINGS_RESET_BCM63268_H */ diff --git a/include/dt-bindings/reset/bcm6328-reset.h b/include/dt-bindings/reset/bcm6328-reset.h new file mode 100644 index 000000000000..0f3df87d47af --- /dev/null +++ b/include/dt-bindings/reset/bcm6328-reset.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __DT_BINDINGS_RESET_BCM6328_H +#define __DT_BINDINGS_RESET_BCM6328_H + +#define BCM6328_RST_SPI 0 +#define BCM6328_RST_EPHY 1 +#define BCM6328_RST_SAR 2 +#define BCM6328_RST_ENETSW 3 +#define BCM6328_RST_USBS 4 +#define BCM6328_RST_USBH 5 +#define BCM6328_RST_PCM 6 +#define BCM6328_RST_PCIE_CORE 7 +#define BCM6328_RST_PCIE 8 +#define BCM6328_RST_PCIE_EXT 9 +#define BCM6328_RST_PCIE_HARD 10 + +#endif /* __DT_BINDINGS_RESET_BCM6328_H */ diff --git a/include/dt-bindings/reset/bcm6358-reset.h b/include/dt-bindings/reset/bcm6358-reset.h new file mode 100644 index 000000000000..bda62ef84f5a --- /dev/null +++ b/include/dt-bindings/reset/bcm6358-reset.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __DT_BINDINGS_RESET_BCM6358_H +#define __DT_BINDINGS_RESET_BCM6358_H + +#define BCM6358_RST_SPI 0 +#define BCM6358_RST_ENET 2 +#define BCM6358_RST_MPI 3 +#define BCM6358_RST_EPHY 6 +#define BCM6358_RST_SAR 7 +#define BCM6358_RST_USBH 12 +#define BCM6358_RST_PCM 13 +#define BCM6358_RST_ADSL 14 + +#endif /* __DT_BINDINGS_RESET_BCM6358_H */ diff --git a/include/dt-bindings/reset/bcm6362-reset.h b/include/dt-bindings/reset/bcm6362-reset.h new file mode 100644 index 000000000000..7ebb0546e0ab --- /dev/null +++ b/include/dt-bindings/reset/bcm6362-reset.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __DT_BINDINGS_RESET_BCM6362_H +#define __DT_BINDINGS_RESET_BCM6362_H + +#define BCM6362_RST_SPI 0 +#define BCM6362_RST_IPSEC 1 +#define BCM6362_RST_EPHY 2 +#define BCM6362_RST_SAR 3 +#define BCM6362_RST_ENETSW 4 +#define BCM6362_RST_USBD 5 +#define BCM6362_RST_USBH 6 +#define BCM6362_RST_PCM 7 +#define BCM6362_RST_PCIE_CORE 8 +#define BCM6362_RST_PCIE 9 +#define BCM6362_RST_PCIE_EXT 10 +#define BCM6362_RST_WLAN_SHIM 11 +#define BCM6362_RST_DDR_PHY 12 +#define BCM6362_RST_FAP 13 +#define BCM6362_RST_WLAN_UBUS 14 + +#endif /* __DT_BINDINGS_RESET_BCM6362_H */ diff --git a/include/dt-bindings/reset/bcm6368-reset.h b/include/dt-bindings/reset/bcm6368-reset.h new file mode 100644 index 000000000000..c81d8eb6d173 --- /dev/null +++ b/include/dt-bindings/reset/bcm6368-reset.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef __DT_BINDINGS_RESET_BCM6368_H +#define __DT_BINDINGS_RESET_BCM6368_H + +#define BCM6368_RST_SPI 0 +#define BCM6368_RST_MPI 3 +#define BCM6368_RST_IPSEC 4 +#define BCM6368_RST_EPHY 6 +#define BCM6368_RST_SAR 7 +#define BCM6368_RST_SWITCH 10 +#define BCM6368_RST_USBD 11 +#define BCM6368_RST_USBH 12 +#define BCM6368_RST_PCM 13 + +#endif /* __DT_BINDINGS_RESET_BCM6368_H */ diff --git a/include/linux/audit.h b/include/linux/audit.h index b3d859831a31..82b7c1116a85 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -292,7 +292,6 @@ extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1, extern void __audit_syscall_exit(int ret_success, long ret_value); extern struct filename *__audit_reusename(const __user char *uptr); extern void __audit_getname(struct filename *name); -extern void __audit_getcwd(void); extern void __audit_inode(struct filename *name, const struct dentry *dentry, unsigned int flags); extern void __audit_file(const struct file *); @@ -351,11 +350,6 @@ static inline void audit_getname(struct filename *name) if (unlikely(!audit_dummy_context())) __audit_getname(name); } -static inline void audit_getcwd(void) -{ - if (unlikely(audit_context())) - __audit_getcwd(); -} static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) { @@ -584,8 +578,6 @@ static inline struct filename *audit_reusename(const __user char *name) } static inline void audit_getname(struct filename *name) { } -static inline void audit_getcwd(void) -{ } static inline void audit_inode(struct filename *name, const struct dentry *dentry, unsigned int aflags) diff --git a/include/linux/bio.h b/include/linux/bio.h index c6d765382926..1edda614f7ce 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -148,11 +148,24 @@ static inline void bio_advance_iter(const struct bio *bio, /* TODO: It is reasonable to complete bio with error here. */ } +/* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ +static inline void bio_advance_iter_single(const struct bio *bio, + struct bvec_iter *iter, + unsigned int bytes) +{ + iter->bi_sector += bytes >> 9; + + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); +} + #define __bio_for_each_segment(bvl, bio, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bio_iter_iovec((bio), (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) #define bio_for_each_segment(bvl, bio, iter) \ __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) @@ -161,7 +174,7 @@ static inline void bio_advance_iter(const struct bio *bio, for (iter = (start); \ (iter).bi_size && \ ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ - bio_advance_iter((bio), &(iter), (bvl).bv_len)) + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) /* iterate over multi-page bvec */ #define bio_for_each_bvec(bvl, bio, iter) \ @@ -711,12 +724,6 @@ static inline bool bioset_initialized(struct bio_set *bs) return bs->bio_slab != NULL; } -/* - * a small number of entries is fine, not going to be performance critical. - * basically we just need to survive - */ -#define BIO_SPLIT_ENTRIES 2 - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_for_each_vec(bvl, bip, iter) \ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c8fc9792ac77..b9f3c246c3c9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -197,12 +197,12 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); struct blkg_conf_ctx { - struct gendisk *disk; + struct block_device *bdev; struct blkcg_gq *blkg; char *body; }; -struct gendisk *blkcg_conf_get_disk(char **inputp); +struct block_device *blkcg_conf_open_bdev(char **inputp); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 794b2a33a2c3..47b021952ac7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -5,6 +5,7 @@ #include #include #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -593,6 +594,20 @@ static inline void blk_mq_cleanup_rq(struct request *rq) rq->q->mq_ops->cleanup_rq(rq); } +static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, + unsigned int nr_segs) +{ + rq->nr_phys_segments = nr_segs; + rq->__data_len = bio->bi_iter.bi_size; + rq->bio = rq->biotail = bio; + rq->ioprio = bio_prio(bio); + + if (bio->bi_disk) + rq->rq_disk = bio->bi_disk; +} + blk_qc_t blk_mq_submit_bio(struct bio *bio); +void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, + struct lock_class_key *key); #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8bac50d6e8c5..b9bd88612041 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -8,6 +8,7 @@ #include #include +#include #include struct bio_set; @@ -20,21 +21,25 @@ typedef void (bio_end_io_t) (struct bio *); struct bio_crypt_ctx; struct block_device { + sector_t bd_start_sect; + struct disk_stats __percpu *bd_stats; + unsigned long bd_stamp; + bool bd_read_only; /* read-only policy */ dev_t bd_dev; int bd_openers; struct inode * bd_inode; /* will die */ struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ void * bd_claiming; + struct device bd_device; void * bd_holder; int bd_holders; bool bd_write_holder; #ifdef CONFIG_SYSFS struct list_head bd_holder_disks; #endif - struct block_device * bd_contains; + struct kobject *bd_holder_dir; u8 bd_partno; - struct hd_struct * bd_part; /* number of times partitions within this device have been opened. */ unsigned bd_part_count; @@ -46,8 +51,23 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; + struct super_block *bd_fsfreeze_sb; + + struct partition_meta_info *bd_meta_info; +#ifdef CONFIG_FAIL_MAKE_REQUEST + bool bd_make_it_fail; +#endif } __randomize_layout; +#define bdev_whole(_bdev) \ + ((_bdev)->bd_disk->part0) + +#define dev_to_bdev(device) \ + container_of((device), struct block_device, bd_device) + +#define bdev_kobj(_bdev) \ + (&((_bdev)->bd_device.kobj)) + /* * Block error status values. See block/blk-core:blk_errors for the details. * Alpha cannot write a byte atomically, so we need to use 32-bit value. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 033eb5f73b65..070de09425ad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -191,7 +191,7 @@ struct request { }; struct gendisk *rq_disk; - struct hd_struct *part; + struct block_device *part; #ifdef CONFIG_BLK_RQ_ALLOC_TIME /* Time that the first bio started allocating this request. */ u64 alloc_time_ns; @@ -1491,7 +1491,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev) return -1; if (bdev_is_partition(bdev)) return queue_limit_alignment_offset(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.alignment_offset; } @@ -1532,7 +1532,7 @@ static inline int bdev_discard_alignment(struct block_device *bdev) if (bdev_is_partition(bdev)) return queue_limit_discard_alignment(&q->limits, - bdev->bd_part->start_sect); + bdev->bd_start_sect); return q->limits.discard_alignment; } @@ -1853,6 +1853,7 @@ struct block_device_operations { void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); int (*getgeo)(struct block_device *, struct hd_geometry *); + int (*set_read_only)(struct block_device *bdev, bool ro); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); int (*report_zones)(struct gendisk *, sector_t sector, @@ -1869,8 +1870,6 @@ extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, #define blkdev_compat_ptr_ioctl NULL #endif -extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, - unsigned long); extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); @@ -1947,9 +1946,9 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); -unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, - struct bio *bio); -void part_end_io_acct(struct hd_struct *part, struct bio *bio, +unsigned long part_start_io_acct(struct gendisk *disk, + struct block_device **part, struct bio *bio); +void part_end_io_acct(struct block_device *part, struct bio *bio, unsigned long start_time); /** @@ -1977,7 +1976,7 @@ int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); const char *bdevname(struct block_device *bdev, char *buffer); -struct block_device *lookup_bdev(const char *); +int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); @@ -1992,14 +1991,17 @@ void blkdev_show(struct seq_file *seqf, off_t offset); struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); -int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, - void *holder); -void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, - void *holder); +int bd_prepare_to_claim(struct block_device *bdev, void *holder); +void bd_abort_claiming(struct block_device *bdev, void *holder); void blkdev_put(struct block_device *bdev, fmode_t mode); +/* just for blk-cgroup, don't use elsewhere */ +struct block_device *blkdev_get_no_open(dev_t dev); +void blkdev_put_no_open(struct block_device *bdev); + +struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); +void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdget_part(struct hd_struct *part); struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); @@ -2024,7 +2026,7 @@ static inline int sync_blockdev(struct block_device *bdev) #endif int fsync_bdev(struct block_device *bdev); -struct super_block *freeze_bdev(struct block_device *bdev); -int thaw_bdev(struct block_device *bdev, struct super_block *sb); +int freeze_bdev(struct block_device *bdev); +int thaw_bdev(struct block_device *bdev); #endif /* _LINUX_BLKDEV_H */ diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 3b6ff5902edc..05556573b896 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -75,8 +75,7 @@ static inline bool blk_trace_note_message_enabled(struct request_queue *q) return ret; } -extern void blk_add_driver_data(struct request_queue *q, struct request *rq, - void *data, size_t len); +extern void blk_add_driver_data(struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg); @@ -90,7 +89,7 @@ extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) # define blk_trace_shutdown(q) do { } while (0) -# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_add_driver_data(rq, data, len) do {} while (0) # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 2efec10bf792..ff832e698efb 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -121,18 +121,28 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } -static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) +/* + * A simpler version of bvec_iter_advance(), @bytes should not span + * across multiple bvec entries, i.e. bytes <= bv[i->bi_idx].bv_len + */ +static inline void bvec_iter_advance_single(const struct bio_vec *bv, + struct bvec_iter *iter, unsigned int bytes) { - iter->bi_bvec_done = 0; - iter->bi_idx++; + unsigned int done = iter->bi_bvec_done + bytes; + + if (done == bv[iter->bi_idx].bv_len) { + done = 0; + iter->bi_idx++; + } + iter->bi_bvec_done = done; + iter->bi_size -= bytes; } #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ - (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ - (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 7c581a4c3797..ca86a00abe86 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -21,10 +21,6 @@ # define _TIF_UPROBE (0) #endif -#ifndef _TIF_NOTIFY_SIGNAL -# define _TIF_NOTIFY_SIGNAL (0) -#endif - /* * SYSCALL_WORK flags handled in syscall_enter_from_user_mode() */ diff --git a/include/linux/fs.h b/include/linux/fs.h index ce50d67406db..ad4cf1bae586 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -696,7 +696,6 @@ struct inode { struct list_head i_devices; union { struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; @@ -1408,7 +1407,7 @@ enum { struct sb_writers { int frozen; /* Is sb frozen? */ - wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */ + wait_queue_head_t wait_unfrozen; /* wait for thaw */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -3131,8 +3130,6 @@ extern struct file_system_type *get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); -extern struct super_block *get_super_thawed(struct block_device *); -extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev); extern struct super_block *get_active_super(struct block_device *bdev); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 03da3f603d30..809aaa32d53c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -19,11 +19,6 @@ #include #include -#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) -#define dev_to_part(device) container_of((device), struct hd_struct, __dev) -#define disk_to_dev(disk) (&(disk)->part0.__dev) -#define part_to_dev(part) (&((part)->__dev)) - extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; @@ -50,31 +45,6 @@ struct partition_meta_info { u8 volname[PARTITION_META_INFO_VOLNAMELTH]; }; -struct hd_struct { - sector_t start_sect; - /* - * nr_sects is protected by sequence counter. One might extend a - * partition while IO is happening to it and update of nr_sects - * can be non-atomic on 32bit machines with 64bit sector_t. - */ - sector_t nr_sects; -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_t nr_sects_seq; -#endif - unsigned long stamp; - struct disk_stats __percpu *dkstats; - struct percpu_ref ref; - - struct device __dev; - struct kobject *holder_dir; - int policy, partno; - struct partition_meta_info *info; -#ifdef CONFIG_FAIL_MAKE_REQUEST - int make_it_fail; -#endif - struct rcu_work rcu_work; -}; - /** * DOC: genhd capability flags * @@ -149,8 +119,8 @@ enum { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct __rcu *last_lookup; - struct hd_struct __rcu *part[]; + struct block_device __rcu *last_lookup; + struct block_device __rcu *part[]; }; struct disk_events; @@ -184,7 +154,7 @@ struct gendisk { * helpers. */ struct disk_part_tbl __rcu *part_tbl; - struct hd_struct part0; + struct block_device *part0; const struct block_device_operations *fops; struct request_queue *queue; @@ -193,7 +163,6 @@ struct gendisk { int flags; unsigned long state; #define GD_NEED_PART_SCAN 0 - struct rw_semaphore lookup_sem; struct kobject *slave_dir; struct timer_rand_state *random; @@ -210,23 +179,21 @@ struct gendisk { struct lockdep_map lockdep_map; }; +/* + * The gendisk is refcounted by the part0 block_device, and the bd_device + * therein is also used for device model presentation in sysfs. + */ +#define dev_to_disk(device) \ + (dev_to_bdev(device)->bd_disk) +#define disk_to_dev(disk) \ + (&((disk)->part0->bd_device)) + #if IS_REACHABLE(CONFIG_CDROM) #define disk_to_cdi(disk) ((disk)->cdi) #else #define disk_to_cdi(disk) NULL #endif -static inline struct gendisk *part_to_disk(struct hd_struct *part) -{ - if (likely(part)) { - if (part->partno) - return dev_to_disk(part_to_dev(part)->parent); - else - return dev_to_disk(part_to_dev(part)); - } - return NULL; -} - static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -245,27 +212,6 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } -static inline dev_t part_devt(struct hd_struct *part) -{ - return part_to_dev(part)->devt; -} - -extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); -extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); - -static inline void disk_put_part(struct hd_struct *part) -{ - if (likely(part)) - put_device(part_to_dev(part)); -} - -static inline void hd_sects_seq_init(struct hd_struct *p) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - seqcount_init(&p->nr_sects_seq); -#endif -} - /* * Smarter partition iterator without context limits. */ @@ -276,14 +222,14 @@ static inline void hd_sects_seq_init(struct hd_struct *p) struct disk_part_iter { struct gendisk *disk; - struct hd_struct *part; + struct block_device *part; int idx; unsigned int flags; }; extern void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, unsigned int flags); -extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +struct block_device *disk_part_iter_next(struct disk_part_iter *piter); extern void disk_part_iter_exit(struct disk_part_iter *piter); extern bool disk_has_partitions(struct gendisk *disk); @@ -301,22 +247,19 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk) } extern void del_gendisk(struct gendisk *gp); -extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); -extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.policy; + return disk->part0->bd_read_only; } extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, - bool update_bdev); +bool set_capacity_and_notify(struct gendisk *disk, sector_t size); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; @@ -324,15 +267,17 @@ extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_part->start_sect; + return bdev->bd_start_sect; } + +static inline sector_t bdev_nr_sectors(struct block_device *bdev) +{ + return i_size_read(bdev->bd_inode) >> 9; +} + static inline sector_t get_capacity(struct gendisk *disk) { - return disk->part0.nr_sects; -} -static inline void set_capacity(struct gendisk *disk, sector_t size) -{ - disk->part0.nr_sects = size; + return bdev_nr_sectors(disk->part0); } int bdev_disk_changed(struct block_device *bdev, bool invalidate); @@ -340,15 +285,7 @@ int blk_add_partitions(struct gendisk *disk, struct block_device *bdev); int blk_drop_partitions(struct block_device *bdev); extern struct gendisk *__alloc_disk_node(int minors, int node_id); -extern struct kobject *get_disk_and_module(struct gendisk *disk); extern void put_disk(struct gendisk *disk); -extern void put_disk_and_module(struct gendisk *disk); -extern void blk_register_region(dev_t devt, unsigned long range, - struct module *module, - struct kobject *(*probe)(dev_t, int *, void *), - int (*lock)(dev_t, void *), - void *data); -extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk_node(minors, node_id) \ ({ \ @@ -368,13 +305,15 @@ extern void blk_unregister_region(dev_t devt, unsigned long range); #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) -int register_blkdev(unsigned int major, const char *name); +int __register_blkdev(unsigned int major, const char *name, + void (*probe)(dev_t devt)); +#define register_blkdev(major, name) \ + __register_blkdev(major, name, NULL) void unregister_blkdev(unsigned int major, const char *name); -void revalidate_disk_size(struct gendisk *disk, bool verbose); bool bdev_check_media_change(struct block_device *bdev); int __invalidate_device(struct block_device *bdev, bool kill_dirty); -void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); +void set_capacity(struct gendisk *disk, sector_t size); /* for drivers/char/raw.c: */ int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); @@ -395,18 +334,16 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, } #endif /* CONFIG_SYSFS */ +extern struct rw_semaphore bdev_lookup_sem; + +dev_t blk_lookup_devt(const char *name, int partno); +void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK void printk_all_partitions(void); -dev_t blk_lookup_devt(const char *name, int partno); #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -static inline dev_t blk_lookup_devt(const char *name, int partno) -{ - dev_t devt = MKDEV(0, 0); - return devt; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_GENHD_H */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 1ce131f29f3b..5ddb479c4d4c 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -764,6 +764,23 @@ enum vmbus_device_type { HV_UNKNOWN, }; +/* + * Provides request ids for VMBus. Encapsulates guest memory + * addresses and stores the next available slot in req_arr + * to generate new ids in constant time. + */ +struct vmbus_requestor { + u64 *req_arr; + unsigned long *req_bitmap; /* is a given slot available? */ + u32 size; + u64 next_request_id; + spinlock_t req_lock; /* provides atomicity */ +}; + +#define VMBUS_NO_RQSTOR U64_MAX +#define VMBUS_RQST_ERROR (U64_MAX - 1) +#define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2) + struct vmbus_device { u16 dev_type; guid_t guid; @@ -988,8 +1005,14 @@ struct vmbus_channel { u32 fuzz_testing_interrupt_delay; u32 fuzz_testing_message_delay; + /* request/transaction ids for VMBus */ + struct vmbus_requestor requestor; + u32 rqstor_size; }; +u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr); +u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id); + static inline bool is_hvsock_channel(const struct vmbus_channel *c) { return !!(c->offermsg.offer.chn_flags & diff --git a/include/linux/ide.h b/include/linux/ide.h index 62653769509f..2c300689a51a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1493,9 +1493,6 @@ static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; } static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} #endif -void ide_register_region(struct gendisk *); -void ide_unregister_region(struct gendisk *); - void ide_check_nien_quirk_list(ide_drive_t *); void ide_undecoded_slave(ide_drive_t *); diff --git a/include/linux/ima.h b/include/linux/ima.h index 7233a2751754..7db9cca1af34 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -32,6 +32,12 @@ extern int ima_file_hash(struct file *file, char *buf, size_t buf_size); extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size); extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size); +#ifdef CONFIG_IMA_APPRAISE_BOOTPARAM +extern void ima_appraise_parse_cmdline(void); +#else +static inline void ima_appraise_parse_cmdline(void) {} +#endif + #ifdef CONFIG_IMA_KEXEC extern void ima_add_kexec_buffer(struct kimage *image); #endif diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index acc0494cceba..7aaa753b8608 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -311,7 +311,7 @@ LSM_HOOK(int, 0, secmark_relabel_packet, u32 secid) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_inc, void) LSM_HOOK(void, LSM_RET_VOID, secmark_refcount_dec, void) LSM_HOOK(void, LSM_RET_VOID, req_classify_flow, const struct request_sock *req, - struct flowi *fl) + struct flowi_common *flic) LSM_HOOK(int, 0, tun_dev_alloc_security, void **security) LSM_HOOK(void, LSM_RET_VOID, tun_dev_free_security, void *security) LSM_HOOK(int, 0, tun_dev_create, void) @@ -351,7 +351,7 @@ LSM_HOOK(int, 0, xfrm_state_delete_security, struct xfrm_state *x) LSM_HOOK(int, 0, xfrm_policy_lookup, struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) LSM_HOOK(int, 1, xfrm_state_pol_flow_match, struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, const struct flowi_common *flic) LSM_HOOK(int, 0, xfrm_decode_session, struct sk_buff *skb, u32 *secid, int ckall) #endif /* CONFIG_SECURITY_NETWORK_XFRM */ diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c503f7ab8afb..a19adef1f088 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1105,7 +1105,7 @@ * @xfrm_state_pol_flow_match: * @x contains the state to match. * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. + * @flic contains the flowi_common struct to check for a match. * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 24125778ef3e..d2558121d48c 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -25,26 +25,26 @@ struct disk_stats { #define part_stat_unlock() preempt_enable() #define part_stat_get_cpu(part, field, cpu) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field) + (per_cpu_ptr((part)->bd_stats, (cpu))->field) #define part_stat_get(part, field) \ part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ - typeof((part)->dkstats->field) res = 0; \ + typeof((part)->bd_stats->field) res = 0; \ unsigned int _cpu; \ for_each_possible_cpu(_cpu) \ - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ + res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ res; \ }) -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void part_stat_set_all(struct block_device *part, int value) { int i; for_each_possible_cpu(i) - memset(per_cpu_ptr(part->dkstats, i), value, + memset(per_cpu_ptr(part->bd_stats, i), value, sizeof(struct disk_stats)); } @@ -54,29 +54,28 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) part_stat_read(part, field[STAT_DISCARD])) #define __part_stat_add(part, field, addnd) \ - __this_cpu_add((part)->dkstats->field, addnd) + __this_cpu_add((part)->bd_stats->field, addnd) #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ - if ((part)->partno) \ - __part_stat_add(&part_to_disk((part))->part0, \ - field, addnd); \ + if ((part)->bd_partno) \ + __part_stat_add(bdev_whole(part), field, addnd); \ } while (0) -#define part_stat_dec(gendiskp, field) \ - part_stat_add(gendiskp, field, -1) -#define part_stat_inc(gendiskp, field) \ - part_stat_add(gendiskp, field, 1) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) +#define part_stat_dec(part, field) \ + part_stat_add(part, field, -1) +#define part_stat_inc(part, field) \ + part_stat_add(part, field, 1) +#define part_stat_sub(part, field, subnd) \ + part_stat_add(part, field, -subnd) -#define part_stat_local_dec(gendiskp, field) \ - local_dec(&(part_stat_get(gendiskp, field))) -#define part_stat_local_inc(gendiskp, field) \ - local_inc(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read(gendiskp, field) \ - local_read(&(part_stat_get(gendiskp, field))) -#define part_stat_local_read_cpu(gendiskp, field, cpu) \ - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) +#define part_stat_local_dec(part, field) \ + local_dec(&(part_stat_get(part, field))) +#define part_stat_local_inc(part, field) \ + local_inc(&(part_stat_get(part, field))) +#define part_stat_local_read(part, field) \ + local_read(&(part_stat_get(part, field))) +#define part_stat_local_read_cpu(part, field, cpu) \ + local_read(&(part_stat_get_cpu(part, field, cpu))) #endif /* _LINUX_PART_STAT_H */ diff --git a/include/linux/platform_data/jz4740/jz4740_nand.h b/include/linux/platform_data/jz4740/jz4740_nand.h deleted file mode 100644 index b3f066d63059..000000000000 --- a/include/linux/platform_data/jz4740/jz4740_nand.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009-2010, Lars-Peter Clausen - * JZ4740 SoC NAND controller driver - */ - -#ifndef __JZ4740_NAND_H__ -#define __JZ4740_NAND_H__ - -#include -#include - -#define JZ_NAND_NUM_BANKS 4 - -struct jz_nand_platform_data { - int num_partitions; - struct mtd_partition *partitions; - - unsigned char banks[JZ_NAND_NUM_BANKS]; - - void (*ident_callback)(struct platform_device *, struct mtd_info *, - struct mtd_partition **, int *num_partitions); -}; - -#endif diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h index 61e914522b01..99564f93d774 100644 --- a/include/linux/pstore_blk.h +++ b/include/linux/pstore_blk.h @@ -7,48 +7,6 @@ #include #include -/** - * typedef pstore_blk_panic_write_op - panic write operation to block device - * - * @buf: the data to write - * @start_sect: start sector to block device - * @sects: sectors count on buf - * - * Return: On success, zero should be returned. Others excluding -ENOMSG - * mean error. -ENOMSG means to try next zone. - * - * Panic write to block device must be aligned to SECTOR_SIZE. - */ -typedef int (*pstore_blk_panic_write_op)(const char *buf, sector_t start_sect, - sector_t sects); - -/** - * struct pstore_blk_info - pstore/blk registration details - * - * @major: Which major device number to support with pstore/blk - * @flags: The supported PSTORE_FLAGS_* from linux/pstore.h. - * @panic_write:The write operation only used for the panic case. - * This can be NULL, but is recommended to avoid losing - * crash data if the kernel's IO path or work queues are - * broken during a panic. - * @devt: The dev_t that pstore/blk has attached to. - * @nr_sects: Number of sectors on @devt. - * @start_sect: Starting sector on @devt. - */ -struct pstore_blk_info { - unsigned int major; - unsigned int flags; - pstore_blk_panic_write_op panic_write; - - /* Filled in by pstore/blk after registration. */ - dev_t devt; - sector_t nr_sects; - sector_t start_sect; -}; - -int register_pstore_blk(struct pstore_blk_info *info); -void unregister_pstore_blk(unsigned int major); - /** * struct pstore_device_info - back-end pstore/blk driver structure. * diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index e40d019c3d9d..74cc6384715e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -32,11 +32,6 @@ struct sbitmap_word { * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; - - /** - * @swap_lock: Held while swapping word <-> cleared - */ - spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index d2b4204ba4d3..fa067de9f1a9 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,7 +19,6 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ -#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -29,10 +28,9 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) -#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 4e116cd2e5a2..3f6a0fcaa10c 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -361,7 +361,6 @@ static inline int task_sigpending(struct task_struct *p) static inline int signal_pending(struct task_struct *p) { -#if defined(TIF_NOTIFY_SIGNAL) /* * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same * behavior in terms of ensuring that we break out of wait loops @@ -369,7 +368,6 @@ static inline int signal_pending(struct task_struct *p) */ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL))) return 1; -#endif return task_sigpending(p); } diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 47763f3999f7..0c564e5d40ff 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -121,4 +121,11 @@ static inline long seccomp_get_metadata(struct task_struct *task, return -EINVAL; } #endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */ + +#ifdef CONFIG_SECCOMP_CACHE_DEBUG +struct seq_file; + +int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task); +#endif #endif /* _LINUX_SECCOMP_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 77c7fe58d8a0..c35ea0ffccd9 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -168,7 +168,7 @@ struct sk_buff; struct sock; struct sockaddr; struct socket; -struct flowi; +struct flowi_common; struct dst_entry; struct xfrm_selector; struct xfrm_policy; @@ -1356,8 +1356,9 @@ int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u int security_sk_alloc(struct sock *sk, int family, gfp_t priority); void security_sk_free(struct sock *sk); void security_sk_clone(const struct sock *sk, struct sock *newsk); -void security_sk_classify_flow(struct sock *sk, struct flowi *fl); -void security_req_classify_flow(const struct request_sock *req, struct flowi *fl); +void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic); +void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic); void security_sock_graft(struct sock*sk, struct socket *parent); int security_inet_conn_request(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -1508,11 +1509,13 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) +static inline void security_sk_classify_flow(struct sock *sk, + struct flowi_common *flic) { } -static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +static inline void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic) { } @@ -1639,9 +1642,9 @@ void security_xfrm_state_free(struct xfrm_state *x); int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl); + const struct flowi_common *flic); int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid); -void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); +void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic); #else /* CONFIG_SECURITY_NETWORK_XFRM */ @@ -1693,7 +1696,8 @@ static inline int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_s } static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, const struct flowi *fl) + struct xfrm_policy *xp, + const struct flowi_common *flic) { return 1; } @@ -1703,7 +1707,8 @@ static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) return 0; } -static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +static inline void security_skb_classify_flow(struct sk_buff *skb, + struct flowi_common *flic) { } diff --git a/include/linux/socket.h b/include/linux/socket.h index e9cb30d8cbfb..385894b4a8bb 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -436,6 +436,7 @@ extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_socketpair(int family, int type, int protocol, int __user *usockvec); +extern int __sys_shutdown_sock(struct socket *sock, int how); extern int __sys_shutdown(int fd, int how); extern struct ns_common *get_net_ns(struct ns_common *ns); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 17ac91876544..df0c3c74609e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -317,7 +317,7 @@ asmlinkage long sys_io_uring_setup(u32 entries, struct io_uring_params __user *p); asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, u32 min_complete, u32 flags, - const sigset_t __user *sig, size_t sigsz); + const void __user *argp, size_t argsz); asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op, void __user *arg, unsigned int nr_args); diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 54b925224a13..3e80c4bc66f7 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -206,12 +206,10 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) */ static inline void tracehook_notify_signal(void) { -#if defined(TIF_NOTIFY_SIGNAL) clear_thread_flag(TIF_NOTIFY_SIGNAL); smp_mb__after_atomic(); if (current->task_works) task_work_run(); -#endif } /* @@ -219,11 +217,9 @@ static inline void tracehook_notify_signal(void) */ static inline void set_notify_signal(struct task_struct *task) { -#if defined(TIF_NOTIFY_SIGNAL) if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && !wake_up_state(task, TASK_INTERRUPTIBLE)) kick_process(task); -#endif } #endif /* */ diff --git a/include/net/flow.h b/include/net/flow.h index b2531df3f65f..39d0cedcddee 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -195,11 +195,21 @@ static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) return container_of(fl4, struct flowi, u.ip4); } +static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4) +{ + return &(flowi4_to_flowi(fl4)->u.__fl_common); +} + static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) { return container_of(fl6, struct flowi, u.ip6); } +static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) +{ + return &(flowi6_to_flowi(fl6)->u.__fl_common); +} + static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) { return container_of(fldn, struct flowi, u.dn); diff --git a/include/net/route.h b/include/net/route.h index ff021cab657e..2e6c0e153e3a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -165,7 +165,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } @@ -322,7 +322,7 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_rt_put(rt); flowi4_update_output(fl4, oif, tos, fl4->daddr, fl4->saddr); } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } @@ -338,7 +338,7 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable flowi4_update_output(fl4, sk->sk_bound_dev_if, RT_CONN_FLAGS(sk), fl4->daddr, fl4->saddr); - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); } return rt; diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 34d64ca306b1..0d782663a005 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -64,7 +64,6 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, /** * block_rq_requeue - place block IO request back on a queue - * @q: queue holding operation * @rq: block IO operation request * * The block operation request @rq is being placed back into queue @@ -73,9 +72,9 @@ DEFINE_EVENT(block_buffer, block_dirty_buffer, */ TRACE_EVENT(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) @@ -147,9 +146,9 @@ TRACE_EVENT(block_rq_complete, DECLARE_EVENT_CLASS(block_rq, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(rq), TP_STRUCT__entry( __field( dev_t, dev ) @@ -181,7 +180,6 @@ DECLARE_EVENT_CLASS(block_rq, /** * block_rq_insert - insert block operation request into queue - * @q: target queue * @rq: block IO operation request * * Called immediately before block operation request @rq is inserted @@ -191,14 +189,13 @@ DECLARE_EVENT_CLASS(block_rq, */ DEFINE_EVENT(block_rq, block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** * block_rq_issue - issue pending block IO request operation to device driver - * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is sent to a @@ -206,14 +203,13 @@ DEFINE_EVENT(block_rq, block_rq_insert, */ DEFINE_EVENT(block_rq, block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) + TP_ARGS(rq) ); /** * block_rq_merge - merge request with another one in the elevator - * @q: queue holding operation * @rq: block IO operation operation request * * Called when block operation request @rq from queue @q is merged to another @@ -221,48 +217,9 @@ DEFINE_EVENT(block_rq, block_rq_issue, */ DEFINE_EVENT(block_rq, block_rq_merge, - TP_PROTO(struct request_queue *q, struct request *rq), + TP_PROTO(struct request *rq), - TP_ARGS(q, rq) -); - -/** - * block_bio_bounce - used bounce buffer when processing block operation - * @q: queue holding the block operation - * @bio: block operation - * - * A bounce buffer was used to handle the block operation @bio in @q. - * This occurs when hardware limitations prevent a direct transfer of - * data between the @bio data memory area and the IO device. Use of a - * bounce buffer requires extra copying of data and decreases - * performance. - */ -TRACE_EVENT(block_bio_bounce, - - TP_PROTO(struct request_queue *q, struct bio *bio), - - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio_dev(bio); - __entry->sector = bio->bi_iter.bi_sector; - __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(rq) ); /** @@ -301,11 +258,11 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -DECLARE_EVENT_CLASS(block_bio_merge, +DECLARE_EVENT_CLASS(block_bio, - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), + TP_PROTO(struct bio *bio), - TP_ARGS(q, rq, bio), + TP_ARGS(bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -329,134 +286,63 @@ DECLARE_EVENT_CLASS(block_bio_merge, __entry->nr_sector, __entry->comm) ); +/** + * block_bio_bounce - used bounce buffer when processing block operation + * @bio: block operation + * + * A bounce buffer was used to handle the block operation @bio in @q. + * This occurs when hardware limitations prevent a direct transfer of + * data between the @bio data memory area and the IO device. Use of a + * bounce buffer requires extra copying of data and decreases + * performance. + */ +DEFINE_EVENT(block_bio, block_bio_bounce, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) +); + /** * block_bio_backmerge - merging block operation to the end of an existing operation - * @q: queue holding operation - * @rq: request bio is being merged into * @bio: new block operation to merge * - * Merging block request @bio to the end of an existing block request - * in queue @q. + * Merging block request @bio to the end of an existing block request. */ -DEFINE_EVENT(block_bio_merge, block_bio_backmerge, - - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - - TP_ARGS(q, rq, bio) +DEFINE_EVENT(block_bio, block_bio_backmerge, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation - * @q: queue holding operation - * @rq: request bio is being merged into * @bio: new block operation to merge * - * Merging block IO operation @bio to the beginning of an existing block - * operation in queue @q. + * Merging block IO operation @bio to the beginning of an existing block request. */ -DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, - - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - - TP_ARGS(q, rq, bio) +DEFINE_EVENT(block_bio, block_bio_frontmerge, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_bio_queue - putting new block IO operation in queue - * @q: queue holding operation * @bio: new block operation * * About to place the block IO operation @bio into queue @q. */ -TRACE_EVENT(block_bio_queue, - - TP_PROTO(struct request_queue *q, struct bio *bio), - - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio_dev(bio); - __entry->sector = bio->bi_iter.bi_sector; - __entry->nr_sector = bio_sectors(bio); - blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) -); - -DECLARE_EVENT_CLASS(block_get_rq, - - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - - TP_ARGS(q, bio, rw), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, RWBS_LEN ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio ? bio_dev(bio) : 0; - __entry->sector = bio ? bio->bi_iter.bi_sector : 0; - __entry->nr_sector = bio ? bio_sectors(bio) : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_opf : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_bio, block_bio_queue, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** * block_getrq - get a free request entry in queue for block IO operations - * @q: queue for operations * @bio: pending block IO operation (can be %NULL) - * @rw: low bit indicates a read (%0) or a write (%1) * - * A request struct for queue @q has been allocated to handle the - * block IO operation @bio. + * A request struct has been allocated to handle the block IO operation @bio. */ -DEFINE_EVENT(block_get_rq, block_getrq, - - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - - TP_ARGS(q, bio, rw) -); - -/** - * block_sleeprq - waiting to get a free request entry in queue for block IO operation - * @q: queue for operation - * @bio: pending block IO operation (can be %NULL) - * @rw: low bit indicates a read (%0) or a write (%1) - * - * In the case where a request struct cannot be provided for queue @q - * the process needs to wait for an request struct to become - * available. This tracepoint event is generated each time the - * process goes to sleep waiting for request struct become available. - */ -DEFINE_EVENT(block_get_rq, block_sleeprq, - - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - - TP_ARGS(q, bio, rw) +DEFINE_EVENT(block_bio, block_getrq, + TP_PROTO(struct bio *bio), + TP_ARGS(bio) ); /** @@ -521,21 +407,19 @@ DEFINE_EVENT(block_unplug, block_unplug, /** * block_split - split a single bio struct into two bio structs - * @q: queue containing the bio * @bio: block operation being split * @new_sector: The starting sector for the new bio * - * The bio request @bio in request queue @q needs to be split into two - * bio requests. The newly created @bio request starts at - * @new_sector. This split may be required due to hardware limitation - * such as operation crossing device boundaries in a RAID system. + * The bio request @bio needs to be split into two bio requests. The newly + * created @bio request starts at @new_sector. This split may be required due to + * hardware limitations such as operation crossing device boundaries in a RAID + * system. */ TRACE_EVENT(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, - unsigned int new_sector), + TP_PROTO(struct bio *bio, unsigned int new_sector), - TP_ARGS(q, bio, new_sector), + TP_ARGS(bio, new_sector), TP_STRUCT__entry( __field( dev_t, dev ) @@ -562,9 +446,8 @@ TRACE_EVENT(block_split, /** * block_bio_remap - map request for a logical device to the raw device - * @q: queue holding the operation * @bio: revised operation - * @dev: device for the operation + * @dev: original device for the operation * @from: original sector for the operation * * An operation for a logical device has been mapped to the @@ -572,10 +455,9 @@ TRACE_EVENT(block_split, */ TRACE_EVENT(block_bio_remap, - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from), + TP_PROTO(struct bio *bio, dev_t dev, sector_t from), - TP_ARGS(q, bio, dev, from), + TP_ARGS(bio, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) @@ -605,7 +487,6 @@ TRACE_EVENT(block_bio_remap, /** * block_rq_remap - map request for a block operation request - * @q: queue holding the operation * @rq: block IO operation request * @dev: device for the operation * @from: original sector for the operation @@ -616,10 +497,9 @@ TRACE_EVENT(block_bio_remap, */ TRACE_EVENT(block_rq_remap, - TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, - sector_t from), + TP_PROTO(struct request *rq, dev_t dev, sector_t from), - TP_ARGS(q, rq, dev, from), + TP_ARGS(rq, dev, from), TP_STRUCT__entry( __field( dev_t, dev ) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 98d8e06dea22..d31a2a1e8ef9 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -42,6 +42,8 @@ struct io_uring_sqe { __u32 statx_flags; __u32 fadvise_advice; __u32 splice_flags; + __u32 rename_flags; + __u32 unlink_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { @@ -132,6 +134,9 @@ enum { IORING_OP_PROVIDE_BUFFERS, IORING_OP_REMOVE_BUFFERS, IORING_OP_TEE, + IORING_OP_SHUTDOWN, + IORING_OP_RENAMEAT, + IORING_OP_UNLINKAT, /* this goes last, obviously */ IORING_OP_LAST, @@ -146,6 +151,7 @@ enum { * sqe->timeout_flags */ #define IORING_TIMEOUT_ABS (1U << 0) +#define IORING_TIMEOUT_UPDATE (1U << 1) /* * sqe->splice_flags @@ -226,6 +232,7 @@ struct io_cqring_offsets { #define IORING_ENTER_GETEVENTS (1U << 0) #define IORING_ENTER_SQ_WAKEUP (1U << 1) #define IORING_ENTER_SQ_WAIT (1U << 2) +#define IORING_ENTER_EXT_ARG (1U << 3) /* * Passed in for io_uring_setup(2). Copied back with updated info on success @@ -253,6 +260,8 @@ struct io_uring_params { #define IORING_FEAT_CUR_PERSONALITY (1U << 4) #define IORING_FEAT_FAST_POLL (1U << 5) #define IORING_FEAT_POLL_32BITS (1U << 6) +#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) +#define IORING_FEAT_EXT_ARG (1U << 8) /* * io_uring_register(2) opcodes and arguments @@ -329,4 +338,11 @@ enum { IORING_RESTRICTION_LAST }; +struct io_uring_getevents_arg { + __u64 sigmask; + __u32 sigmask_sz; + __u32 pad; + __u64 ts; +}; + #endif diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 5a8315e6d8a6..00c7235ae93e 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -61,6 +61,15 @@ struct xenbus_watch /* Path being watched. */ const char *node; + unsigned int nr_pending; + + /* + * Called just before enqueing new event while a spinlock is held. + * The event will be discarded if this callback returns false. + */ + bool (*will_handle)(struct xenbus_watch *, + const char *path, const char *token); + /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char *path, const char *token); @@ -197,10 +206,14 @@ void xenbus_probe(struct work_struct *); int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *)); -__printf(4, 5) +__printf(5, 6) int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *), const char *pathfmt, ...); diff --git a/init/do_mounts.c b/init/do_mounts.c index b5f9604d0c98..a78e44ee6adb 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -76,21 +76,15 @@ struct uuidcmp { */ static int match_dev_by_uuid(struct device *dev, const void *data) { + struct block_device *bdev = dev_to_bdev(dev); const struct uuidcmp *cmp = data; - struct hd_struct *part = dev_to_part(dev); - - if (!part->info) - goto no_match; - - if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len)) - goto no_match; + if (!bdev->bd_meta_info || + strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len)) + return 0; return 1; -no_match: - return 0; } - /** * devt_from_partuuid - looks up the dev_t of a partition by its UUID * @uuid_str: char array containing ascii UUID @@ -106,13 +100,10 @@ static int match_dev_by_uuid(struct device *dev, const void *data) */ static dev_t devt_from_partuuid(const char *uuid_str) { - dev_t res = 0; struct uuidcmp cmp; struct device *dev = NULL; - struct gendisk *disk; - struct hd_struct *part; + dev_t devt = 0; int offset = 0; - bool clear_root_wait = false; char *slash; cmp.uuid = uuid_str; @@ -121,52 +112,49 @@ static dev_t devt_from_partuuid(const char *uuid_str) /* Check for optional partition number offset attributes. */ if (slash) { char c = 0; + /* Explicitly fail on poor PARTUUID syntax. */ - if (sscanf(slash + 1, - "PARTNROFF=%d%c", &offset, &c) != 1) { - clear_root_wait = true; - goto done; - } + if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1) + goto clear_root_wait; cmp.len = slash - uuid_str; } else { cmp.len = strlen(uuid_str); } - if (!cmp.len) { - clear_root_wait = true; - goto done; - } + if (!cmp.len) + goto clear_root_wait; - dev = class_find_device(&block_class, NULL, &cmp, - &match_dev_by_uuid); + dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid); if (!dev) - goto done; + return 0; - res = dev->devt; + if (offset) { + /* + * Attempt to find the requested partition by adding an offset + * to the partition number found by UUID. + */ + struct block_device *part; - /* Attempt to find the partition by offset. */ - if (!offset) - goto no_offset; - - res = 0; - disk = part_to_disk(dev_to_part(dev)); - part = disk_get_part(disk, dev_to_part(dev)->partno + offset); - if (part) { - res = part_devt(part); - put_device(part_to_dev(part)); + part = bdget_disk(dev_to_disk(dev), + dev_to_bdev(dev)->bd_partno + offset); + if (part) { + devt = part->bd_dev; + bdput(part); + } + } else { + devt = dev->devt; } -no_offset: put_device(dev); -done: - if (clear_root_wait) { - pr_err("VFS: PARTUUID= is invalid.\n" - "Expected PARTUUID=[/PARTNROFF=%%d]\n"); - if (root_wait) - pr_err("Disabling rootwait; root= is invalid.\n"); - root_wait = 0; - } - return res; + return devt; + +clear_root_wait: + pr_err("VFS: PARTUUID= is invalid.\n" + "Expected PARTUUID=[/PARTNROFF=%%d]\n"); + if (root_wait) + pr_err("Disabling rootwait; root= is invalid.\n"); + root_wait = 0; + return 0; } /** @@ -178,15 +166,90 @@ static dev_t devt_from_partuuid(const char *uuid_str) */ static int match_dev_by_label(struct device *dev, const void *data) { + struct block_device *bdev = dev_to_bdev(dev); const char *label = data; - struct hd_struct *part = dev_to_part(dev); - if (part->info && !strcmp(label, part->info->volname)) - return 1; - - return 0; + if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname)) + return 0; + return 1; +} + +static dev_t devt_from_partlabel(const char *label) +{ + struct device *dev; + dev_t devt = 0; + + dev = class_find_device(&block_class, NULL, label, &match_dev_by_label); + if (dev) { + devt = dev->devt; + put_device(dev); + } + + return devt; +} + +static dev_t devt_from_devname(const char *name) +{ + dev_t devt = 0; + int part; + char s[32]; + char *p; + + if (strlen(name) > 31) + return 0; + strcpy(s, name); + for (p = s; *p; p++) { + if (*p == '/') + *p = '!'; + } + + devt = blk_lookup_devt(s, 0); + if (devt) + return devt; + + /* + * Try non-existent, but valid partition, which may only exist after + * opening the device, like partitioned md devices. + */ + while (p > s && isdigit(p[-1])) + p--; + if (p == s || !*p || *p == '0') + return 0; + + /* try disk name without */ + part = simple_strtoul(p, NULL, 10); + *p = '\0'; + devt = blk_lookup_devt(s, part); + if (devt) + return devt; + + /* try disk name without p */ + if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') + return 0; + p[-1] = '\0'; + return blk_lookup_devt(s, part); +} +#endif /* CONFIG_BLOCK */ + +static dev_t devt_from_devnum(const char *name) +{ + unsigned maj, min, offset; + dev_t devt = 0; + char *p, dummy; + + if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || + sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { + devt = MKDEV(maj, min); + if (maj != MAJOR(devt) || min != MINOR(devt)) + return 0; + } else { + devt = new_decode_dev(simple_strtoul(name, &p, 16)); + if (*p) + return 0; + } + + return devt; } -#endif /* * Convert a name into device number. We accept the following variants: @@ -218,101 +281,23 @@ static int match_dev_by_label(struct device *dev, const void *data) * name contains slashes, the device name has them replaced with * bangs. */ - dev_t name_to_dev_t(const char *name) { - char s[32]; - char *p; - dev_t res = 0; - int part; - + if (strcmp(name, "/dev/nfs") == 0) + return Root_NFS; + if (strcmp(name, "/dev/cifs") == 0) + return Root_CIFS; + if (strcmp(name, "/dev/ram") == 0) + return Root_RAM0; #ifdef CONFIG_BLOCK - if (strncmp(name, "PARTUUID=", 9) == 0) { - name += 9; - res = devt_from_partuuid(name); - if (!res) - goto fail; - goto done; - } else if (strncmp(name, "PARTLABEL=", 10) == 0) { - struct device *dev; - - dev = class_find_device(&block_class, NULL, name + 10, - &match_dev_by_label); - if (!dev) - goto fail; - - res = dev->devt; - put_device(dev); - goto done; - } + if (strncmp(name, "PARTUUID=", 9) == 0) + return devt_from_partuuid(name + 9); + if (strncmp(name, "PARTLABEL=", 10) == 0) + return devt_from_partlabel(name + 10); + if (strncmp(name, "/dev/", 5) == 0) + return devt_from_devname(name + 5); #endif - - if (strncmp(name, "/dev/", 5) != 0) { - unsigned maj, min, offset; - char dummy; - - if ((sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2) || - (sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3)) { - res = MKDEV(maj, min); - if (maj != MAJOR(res) || min != MINOR(res)) - goto fail; - } else { - res = new_decode_dev(simple_strtoul(name, &p, 16)); - if (*p) - goto fail; - } - goto done; - } - - name += 5; - res = Root_NFS; - if (strcmp(name, "nfs") == 0) - goto done; - res = Root_CIFS; - if (strcmp(name, "cifs") == 0) - goto done; - res = Root_RAM0; - if (strcmp(name, "ram") == 0) - goto done; - - if (strlen(name) > 31) - goto fail; - strcpy(s, name); - for (p = s; *p; p++) - if (*p == '/') - *p = '!'; - res = blk_lookup_devt(s, 0); - if (res) - goto done; - - /* - * try non-existent, but valid partition, which may only exist - * after revalidating the disk, like partitioned md devices - */ - while (p > s && isdigit(p[-1])) - p--; - if (p == s || !*p || *p == '0') - goto fail; - - /* try disk name without */ - part = simple_strtoul(p, NULL, 10); - *p = '\0'; - res = blk_lookup_devt(s, part); - if (res) - goto done; - - /* try disk name without p */ - if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') - goto fail; - p[-1] = '\0'; - res = blk_lookup_devt(s, part); - if (res) - goto done; - -fail: - return 0; -done: - return res; + return devt_from_devnum(name); } EXPORT_SYMBOL_GPL(name_to_dev_t); diff --git a/kernel/audit.c b/kernel/audit.c index 68cee3bc8cfe..1ffc2e059027 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -67,7 +67,7 @@ #define AUDIT_DISABLED -1 #define AUDIT_UNINITIALIZED 0 #define AUDIT_INITIALIZED 1 -static int audit_initialized; +static int audit_initialized = AUDIT_UNINITIALIZED; u32 audit_enabled = AUDIT_OFF; bool audit_ever_enabled = !!AUDIT_OFF; @@ -523,7 +523,7 @@ static int auditd_set(struct pid *pid, u32 portid, struct net *net) } /** - * kauditd_print_skb - Print the audit record to the ring buffer + * kauditd_printk_skb - Print the audit record to the ring buffer * @skb: audit record * * Whatever the reason, this packet may not make it to the auditd connection @@ -1779,7 +1779,7 @@ unsigned int audit_serial(void) { static atomic_t serial = ATOMIC_INIT(0); - return atomic_add_return(1, &serial); + return atomic_inc_return(&serial); } static inline void audit_get_stamp(struct audit_context *ctx, @@ -1865,6 +1865,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, } audit_get_stamp(ab->ctx, &t, &serial); + /* cancel dummy context to enable supporting records */ + if (ctx) + ctx->dummy = 0; audit_log_format(ab, "audit(%llu.%03lu:%u): ", (unsigned long long)t.tv_sec, t.tv_nsec/1000000, serial); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index c00aa5837965..ce8c9e2279ba 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -102,8 +102,6 @@ struct audit_aux_data { int type; }; -#define AUDIT_AUX_IPCPERM 0 - /* Number of target pids per aux struct. */ #define AUDIT_AUX_PIDS 16 @@ -552,11 +550,11 @@ static int audit_filter_rules(struct task_struct *tsk, break; case AUDIT_EXIT: - if (ctx && ctx->return_valid) + if (ctx && ctx->return_valid != AUDITSC_INVALID) result = audit_comparator(ctx->return_code, f->op, f->val); break; case AUDIT_SUCCESS: - if (ctx && ctx->return_valid) { + if (ctx && ctx->return_valid != AUDITSC_INVALID) { if (f->val) result = audit_comparator(ctx->return_valid, f->op, AUDITSC_SUCCESS); else @@ -929,6 +927,8 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state) context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0; INIT_LIST_HEAD(&context->killed_trees); INIT_LIST_HEAD(&context->names_list); + context->fds[0] = -1; + context->return_valid = AUDITSC_INVALID; return context; } @@ -1367,7 +1367,10 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, /* name was specified as a relative path and the * directory component is the cwd */ - audit_log_d_path(ab, " name=", &context->pwd); + if (context->pwd.dentry && context->pwd.mnt) + audit_log_d_path(ab, " name=", &context->pwd); + else + audit_log_format(ab, " name=(null)"); break; default: /* log the name's directory component */ @@ -1435,9 +1438,6 @@ static void audit_log_proctitle(void) struct audit_context *context = audit_context(); struct audit_buffer *ab; - if (!context || context->dummy) - return; - ab = audit_log_start(context, GFP_KERNEL, AUDIT_PROCTITLE); if (!ab) return; /* audit_panic or being filtered */ @@ -1487,7 +1487,7 @@ static void audit_log_exit(void) context->arch, context->major); if (context->personality != PER_LINUX) audit_log_format(ab, " per=%lx", context->personality); - if (context->return_valid) + if (context->return_valid != AUDITSC_INVALID) audit_log_format(ab, " success=%s exit=%ld", (context->return_valid==AUDITSC_SUCCESS)?"yes":"no", context->return_code); @@ -1624,7 +1624,7 @@ void __audit_free(struct task_struct *tsk) * need to log via audit_log_exit(). */ if (tsk == current && !context->dummy && context->in_syscall) { - context->return_valid = 0; + context->return_valid = AUDITSC_INVALID; context->return_code = 0; audit_filter_syscall(tsk, context, @@ -1866,6 +1866,8 @@ static struct audit_names *audit_alloc_name(struct audit_context *context, list_add_tail(&aname->list, &context->names_list); context->name_count++; + if (!context->pwd.dentry) + get_fs_pwd(current->fs, &context->pwd); return aname; } @@ -1894,20 +1896,6 @@ __audit_reusename(const __user char *uptr) return NULL; } -inline void _audit_getcwd(struct audit_context *context) -{ - if (!context->pwd.dentry) - get_fs_pwd(current->fs, &context->pwd); -} - -void __audit_getcwd(void) -{ - struct audit_context *context = audit_context(); - - if (context->in_syscall) - _audit_getcwd(context); -} - /** * __audit_getname - add a name to the list * @name: name to add @@ -1931,8 +1919,6 @@ void __audit_getname(struct filename *name) n->name_len = AUDIT_NAME_FULL; name->aname = n; name->refcnt++; - - _audit_getcwd(context); } static inline int audit_copy_fcaps(struct audit_names *name, diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 15f47fc11d13..952dc1c90229 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -143,6 +143,38 @@ struct notification { struct list_head notifications; }; +#ifdef SECCOMP_ARCH_NATIVE +/** + * struct action_cache - per-filter cache of seccomp actions per + * arch/syscall pair + * + * @allow_native: A bitmap where each bit represents whether the + * filter will always allow the syscall, for the + * native architecture. + * @allow_compat: A bitmap where each bit represents whether the + * filter will always allow the syscall, for the + * compat architecture. + */ +struct action_cache { + DECLARE_BITMAP(allow_native, SECCOMP_ARCH_NATIVE_NR); +#ifdef SECCOMP_ARCH_COMPAT + DECLARE_BITMAP(allow_compat, SECCOMP_ARCH_COMPAT_NR); +#endif +}; +#else +struct action_cache { }; + +static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter, + const struct seccomp_data *sd) +{ + return false; +} + +static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter) +{ +} +#endif /* SECCOMP_ARCH_NATIVE */ + /** * struct seccomp_filter - container for seccomp BPF programs * @@ -159,6 +191,7 @@ struct notification { * this filter after reaching 0. The @users count is always smaller * or equal to @refs. Hence, reaching 0 for @users does not mean * the filter can be freed. + * @cache: cache of arch/syscall mappings to actions * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged * @prev: points to a previously installed, or inherited, filter * @prog: the BPF program to evaluate @@ -180,6 +213,7 @@ struct seccomp_filter { refcount_t refs; refcount_t users; bool log; + struct action_cache cache; struct seccomp_filter *prev; struct bpf_prog *prog; struct notification *notif; @@ -298,6 +332,52 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) return 0; } +#ifdef SECCOMP_ARCH_NATIVE +static inline bool seccomp_cache_check_allow_bitmap(const void *bitmap, + size_t bitmap_size, + int syscall_nr) +{ + if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size)) + return false; + syscall_nr = array_index_nospec(syscall_nr, bitmap_size); + + return test_bit(syscall_nr, bitmap); +} + +/** + * seccomp_cache_check_allow - lookup seccomp cache + * @sfilter: The seccomp filter + * @sd: The seccomp data to lookup the cache with + * + * Returns true if the seccomp_data is cached and allowed. + */ +static inline bool seccomp_cache_check_allow(const struct seccomp_filter *sfilter, + const struct seccomp_data *sd) +{ + int syscall_nr = sd->nr; + const struct action_cache *cache = &sfilter->cache; + +#ifndef SECCOMP_ARCH_COMPAT + /* A native-only architecture doesn't need to check sd->arch. */ + return seccomp_cache_check_allow_bitmap(cache->allow_native, + SECCOMP_ARCH_NATIVE_NR, + syscall_nr); +#else + if (likely(sd->arch == SECCOMP_ARCH_NATIVE)) + return seccomp_cache_check_allow_bitmap(cache->allow_native, + SECCOMP_ARCH_NATIVE_NR, + syscall_nr); + if (likely(sd->arch == SECCOMP_ARCH_COMPAT)) + return seccomp_cache_check_allow_bitmap(cache->allow_compat, + SECCOMP_ARCH_COMPAT_NR, + syscall_nr); +#endif /* SECCOMP_ARCH_COMPAT */ + + WARN_ON_ONCE(true); + return false; +} +#endif /* SECCOMP_ARCH_NATIVE */ + /** * seccomp_run_filters - evaluates all seccomp filters against @sd * @sd: optional seccomp data to be passed to filters @@ -320,6 +400,9 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, if (WARN_ON(f == NULL)) return SECCOMP_RET_KILL_PROCESS; + if (seccomp_cache_check_allow(f, sd)) + return SECCOMP_RET_ALLOW; + /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA). @@ -470,6 +553,9 @@ void seccomp_filter_release(struct task_struct *tsk) { struct seccomp_filter *orig = tsk->seccomp.filter; + /* We are effectively holding the siglock by not having any sighand. */ + WARN_ON(tsk->sighand != NULL); + /* Detach task from its filter tree. */ tsk->seccomp.filter = NULL; __seccomp_filter_release(orig); @@ -544,7 +630,12 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) { struct seccomp_filter *sfilter; int ret; - const bool save_orig = IS_ENABLED(CONFIG_CHECKPOINT_RESTORE); + const bool save_orig = +#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(SECCOMP_ARCH_NATIVE) + true; +#else + false; +#endif if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) return ERR_PTR(-EINVAL); @@ -609,6 +700,148 @@ seccomp_prepare_user_filter(const char __user *user_filter) return filter; } +#ifdef SECCOMP_ARCH_NATIVE +/** + * seccomp_is_const_allow - check if filter is constant allow with given data + * @fprog: The BPF programs + * @sd: The seccomp data to check against, only syscall number and arch + * number are considered constant. + */ +static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog, + struct seccomp_data *sd) +{ + unsigned int reg_value = 0; + unsigned int pc; + bool op_res; + + if (WARN_ON_ONCE(!fprog)) + return false; + + for (pc = 0; pc < fprog->len; pc++) { + struct sock_filter *insn = &fprog->filter[pc]; + u16 code = insn->code; + u32 k = insn->k; + + switch (code) { + case BPF_LD | BPF_W | BPF_ABS: + switch (k) { + case offsetof(struct seccomp_data, nr): + reg_value = sd->nr; + break; + case offsetof(struct seccomp_data, arch): + reg_value = sd->arch; + break; + default: + /* can't optimize (non-constant value load) */ + return false; + } + break; + case BPF_RET | BPF_K: + /* reached return with constant values only, check allow */ + return k == SECCOMP_RET_ALLOW; + case BPF_JMP | BPF_JA: + pc += insn->k; + break; + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JSET | BPF_K: + switch (BPF_OP(code)) { + case BPF_JEQ: + op_res = reg_value == k; + break; + case BPF_JGE: + op_res = reg_value >= k; + break; + case BPF_JGT: + op_res = reg_value > k; + break; + case BPF_JSET: + op_res = !!(reg_value & k); + break; + default: + /* can't optimize (unknown jump) */ + return false; + } + + pc += op_res ? insn->jt : insn->jf; + break; + case BPF_ALU | BPF_AND | BPF_K: + reg_value &= k; + break; + default: + /* can't optimize (unknown insn) */ + return false; + } + } + + /* ran off the end of the filter?! */ + WARN_ON(1); + return false; +} + +static void seccomp_cache_prepare_bitmap(struct seccomp_filter *sfilter, + void *bitmap, const void *bitmap_prev, + size_t bitmap_size, int arch) +{ + struct sock_fprog_kern *fprog = sfilter->prog->orig_prog; + struct seccomp_data sd; + int nr; + + if (bitmap_prev) { + /* The new filter must be as restrictive as the last. */ + bitmap_copy(bitmap, bitmap_prev, bitmap_size); + } else { + /* Before any filters, all syscalls are always allowed. */ + bitmap_fill(bitmap, bitmap_size); + } + + for (nr = 0; nr < bitmap_size; nr++) { + /* No bitmap change: not a cacheable action. */ + if (!test_bit(nr, bitmap)) + continue; + + sd.nr = nr; + sd.arch = arch; + + /* No bitmap change: continue to always allow. */ + if (seccomp_is_const_allow(fprog, &sd)) + continue; + + /* + * Not a cacheable action: always run filters. + * atomic clear_bit() not needed, filter not visible yet. + */ + __clear_bit(nr, bitmap); + } +} + +/** + * seccomp_cache_prepare - emulate the filter to find cachable syscalls + * @sfilter: The seccomp filter + * + * Returns 0 if successful or -errno if error occurred. + */ +static void seccomp_cache_prepare(struct seccomp_filter *sfilter) +{ + struct action_cache *cache = &sfilter->cache; + const struct action_cache *cache_prev = + sfilter->prev ? &sfilter->prev->cache : NULL; + + seccomp_cache_prepare_bitmap(sfilter, cache->allow_native, + cache_prev ? cache_prev->allow_native : NULL, + SECCOMP_ARCH_NATIVE_NR, + SECCOMP_ARCH_NATIVE); + +#ifdef SECCOMP_ARCH_COMPAT + seccomp_cache_prepare_bitmap(sfilter, cache->allow_compat, + cache_prev ? cache_prev->allow_compat : NULL, + SECCOMP_ARCH_COMPAT_NR, + SECCOMP_ARCH_COMPAT); +#endif /* SECCOMP_ARCH_COMPAT */ +} +#endif /* SECCOMP_ARCH_NATIVE */ + /** * seccomp_attach_filter: validate and attach filter * @flags: flags to change filter behavior @@ -658,6 +891,7 @@ static long seccomp_attach_filter(unsigned int flags, * task reference. */ filter->prev = current->seccomp.filter; + seccomp_cache_prepare(filter); current->seccomp.filter = filter; atomic_inc(¤t->seccomp.filter_count); @@ -1967,7 +2201,7 @@ static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names) return true; } -static int read_actions_logged(struct ctl_table *ro_table, void __user *buffer, +static int read_actions_logged(struct ctl_table *ro_table, void *buffer, size_t *lenp, loff_t *ppos) { char names[sizeof(seccomp_actions_avail)]; @@ -1985,7 +2219,7 @@ static int read_actions_logged(struct ctl_table *ro_table, void __user *buffer, return proc_dostring(&table, 0, buffer, lenp, ppos); } -static int write_actions_logged(struct ctl_table *ro_table, void __user *buffer, +static int write_actions_logged(struct ctl_table *ro_table, void *buffer, size_t *lenp, loff_t *ppos, u32 *actions_logged) { char names[sizeof(seccomp_actions_avail)]; @@ -2103,3 +2337,59 @@ static int __init seccomp_sysctl_init(void) device_initcall(seccomp_sysctl_init) #endif /* CONFIG_SYSCTL */ + +#ifdef CONFIG_SECCOMP_CACHE_DEBUG +/* Currently CONFIG_SECCOMP_CACHE_DEBUG implies SECCOMP_ARCH_NATIVE */ +static void proc_pid_seccomp_cache_arch(struct seq_file *m, const char *name, + const void *bitmap, size_t bitmap_size) +{ + int nr; + + for (nr = 0; nr < bitmap_size; nr++) { + bool cached = test_bit(nr, bitmap); + char *status = cached ? "ALLOW" : "FILTER"; + + seq_printf(m, "%s %d %s\n", name, nr, status); + } +} + +int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + struct seccomp_filter *f; + unsigned long flags; + + /* + * We don't want some sandboxed process to know what their seccomp + * filters consist of. + */ + if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) + return -EACCES; + + if (!lock_task_sighand(task, &flags)) + return -ESRCH; + + f = READ_ONCE(task->seccomp.filter); + if (!f) { + unlock_task_sighand(task, &flags); + return 0; + } + + /* prevent filter from being freed while we are printing it */ + __get_seccomp_filter(f); + unlock_task_sighand(task, &flags); + + proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_NATIVE_NAME, + f->cache.allow_native, + SECCOMP_ARCH_NATIVE_NR); + +#ifdef SECCOMP_ARCH_COMPAT + proc_pid_seccomp_cache_arch(m, SECCOMP_ARCH_COMPAT_NAME, + f->cache.allow_compat, + SECCOMP_ARCH_COMPAT_NR); +#endif /* SECCOMP_ARCH_COMPAT */ + + __put_seccomp_filter(f); + return 0; +} +#endif /* CONFIG_SECCOMP_CACHE_DEBUG */ diff --git a/kernel/signal.c b/kernel/signal.c index e7defcb176e0..98dfdc4f43bf 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2556,14 +2556,12 @@ bool get_signal(struct ksignal *ksig) * that the arch handlers don't all have to do it. If we get here * without TIF_SIGPENDING, just exit after running signal work. */ -#ifdef TIF_NOTIFY_SIGNAL if (!IS_ENABLED(CONFIG_GENERIC_ENTRY)) { if (test_thread_flag(TIF_NOTIFY_SIGNAL)) tracehook_notify_signal(); if (!task_sigpending(current)) return false; } -#endif if (unlikely(uprobe_deny_signal())) return false; @@ -2577,26 +2575,6 @@ bool get_signal(struct ksignal *ksig) relock: spin_lock_irq(&sighand->siglock); - /* - * Make sure we can safely read ->jobctl() in task_work add. As Oleg - * states: - * - * It pairs with mb (implied by cmpxchg) before READ_ONCE. So we - * roughly have - * - * task_work_add: get_signal: - * STORE(task->task_works, new_work); STORE(task->jobctl); - * mb(); mb(); - * LOAD(task->jobctl); LOAD(task->task_works); - * - * and we can rely on STORE-MB-LOAD [ in task_work_add]. - */ - smp_store_mb(current->jobctl, current->jobctl & ~JOBCTL_TASK_WORK); - if (unlikely(current->task_works)) { - spin_unlock_irq(&sighand->siglock); - task_work_run(); - goto relock; - } /* * Every stopped thread goes here after wakeup. Check to see if diff --git a/kernel/task_work.c b/kernel/task_work.c index 15b087286bea..9cde961875c0 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -5,34 +5,6 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ -/* - * TWA_SIGNAL signaling - use TIF_NOTIFY_SIGNAL, if available, as it's faster - * than TIF_SIGPENDING as there's no dependency on ->sighand. The latter is - * shared for threads, and can cause contention on sighand->lock. Even for - * the non-threaded case TIF_NOTIFY_SIGNAL is more efficient, as no locking - * or IRQ disabling is involved for notification (or running) purposes. - */ -static void task_work_notify_signal(struct task_struct *task) -{ -#if defined(TIF_NOTIFY_SIGNAL) - set_notify_signal(task); -#else - unsigned long flags; - - /* - * Only grab the sighand lock if we don't already have some - * task_work pending. This pairs with the smp_store_mb() - * in get_signal(), see comment there. - */ - if (!(READ_ONCE(task->jobctl) & JOBCTL_TASK_WORK) && - lock_task_sighand(task, &flags)) { - task->jobctl |= JOBCTL_TASK_WORK; - signal_wake_up(task, 0); - unlock_task_sighand(task, &flags); - } -#endif -} - /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback @@ -76,7 +48,7 @@ int task_work_add(struct task_struct *task, struct callback_head *work, set_notify_resume(task); break; case TWA_SIGNAL: - task_work_notify_signal(task); + set_notify_signal(task); break; default: WARN_ON_ONCE(1); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b5c4b9ade960..456fe4ce6942 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -458,14 +458,9 @@ static const struct rchan_callbacks blk_relay_callbacks = { static void blk_trace_setup_lba(struct blk_trace *bt, struct block_device *bdev) { - struct hd_struct *part = NULL; - - if (bdev) - part = bdev->bd_part; - - if (part) { - bt->start_lba = part->start_sect; - bt->end_lba = part->start_sect + part->nr_sects; + if (bdev) { + bt->start_lba = bdev->bd_start_sect; + bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev); } else { bt->start_lba = 0; bt->end_lba = -1ULL; @@ -800,12 +795,12 @@ static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) #endif static u64 -blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) +blk_trace_request_get_cgid(struct request *rq) { if (!rq->bio) return 0; /* Use the first bio */ - return blk_trace_bio_get_cgid(q, rq->bio); + return blk_trace_bio_get_cgid(rq->q, rq->bio); } /* @@ -846,40 +841,35 @@ static void blk_add_trace_rq(struct request *rq, int error, rcu_read_unlock(); } -static void blk_add_trace_rq_insert(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_insert(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_issue(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_issue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_merge(void *ignore, - struct request_queue *q, struct request *rq) +static void blk_add_trace_rq_merge(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } -static void blk_add_trace_rq_requeue(void *ignore, - struct request_queue *q, - struct request *rq) +static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_complete(void *ignore, struct request *rq, int error, unsigned int nr_bytes) { blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, - blk_trace_request_get_cgid(rq->q, rq)); + blk_trace_request_get_cgid(rq)); } /** @@ -911,10 +901,9 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, rcu_read_unlock(); } -static void blk_add_trace_bio_bounce(void *ignore, - struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BOUNCE, 0); } static void blk_add_trace_bio_complete(void *ignore, @@ -924,63 +913,24 @@ static void blk_add_trace_bio_complete(void *ignore, blk_status_to_errno(bio->bi_status)); } -static void blk_add_trace_bio_backmerge(void *ignore, - struct request_queue *q, - struct request *rq, - struct bio *bio) +static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BACKMERGE, 0); } -static void blk_add_trace_bio_frontmerge(void *ignore, - struct request_queue *q, - struct request *rq, - struct bio *bio) +static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_FRONTMERGE, 0); } -static void blk_add_trace_bio_queue(void *ignore, - struct request_queue *q, struct bio *bio) +static void blk_add_trace_bio_queue(void *ignore, struct bio *bio) { - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_QUEUE, 0); } -static void blk_add_trace_getrq(void *ignore, - struct request_queue *q, - struct bio *bio, int rw) +static void blk_add_trace_getrq(void *ignore, struct bio *bio) { - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); - else { - struct blk_trace *bt; - - rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); - if (bt) - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, - NULL, 0); - rcu_read_unlock(); - } -} - - -static void blk_add_trace_sleeprq(void *ignore, - struct request_queue *q, - struct bio *bio, int rw) -{ - if (bio) - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); - else { - struct blk_trace *bt; - - rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); - if (bt) - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, - 0, 0, NULL, 0); - rcu_read_unlock(); - } + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_GETRQ, 0); } static void blk_add_trace_plug(void *ignore, struct request_queue *q) @@ -1015,10 +965,9 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, rcu_read_unlock(); } -static void blk_add_trace_split(void *ignore, - struct request_queue *q, struct bio *bio, - unsigned int pdu) +static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu) { + struct request_queue *q = bio->bi_disk->queue; struct blk_trace *bt; rcu_read_lock(); @@ -1039,20 +988,16 @@ static void blk_add_trace_split(void *ignore, /** * blk_add_trace_bio_remap - Add a trace for a bio-remap operation * @ignore: trace callback data parameter (not used) - * @q: queue the io is for * @bio: the source bio - * @dev: target device + * @dev: source device * @from: source sector * - * Description: - * Device mapper or raid target sometimes need to split a bio because - * it spans a stripe (or similar). Add a trace for that action. - * + * Called after a bio is remapped to a different device and/or sector. **/ -static void blk_add_trace_bio_remap(void *ignore, - struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from) +static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, + sector_t from) { + struct request_queue *q = bio->bi_disk->queue; struct blk_trace *bt; struct blk_io_trace_remap r; @@ -1077,7 +1022,6 @@ static void blk_add_trace_bio_remap(void *ignore, /** * blk_add_trace_rq_remap - Add a trace for a request-remap operation * @ignore: trace callback data parameter (not used) - * @q: queue the io is for * @rq: the source request * @dev: target device * @from: source sector @@ -1087,16 +1031,14 @@ static void blk_add_trace_bio_remap(void *ignore, * Add a trace for that action. * **/ -static void blk_add_trace_rq_remap(void *ignore, - struct request_queue *q, - struct request *rq, dev_t dev, +static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { struct blk_trace *bt; struct blk_io_trace_remap r; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; @@ -1108,13 +1050,12 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, - sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } /** * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for * @rq: io request * @data: driver-specific data * @len: length of driver-specific data @@ -1123,14 +1064,12 @@ static void blk_add_trace_rq_remap(void *ignore, * Some drivers might want to write driver-specific data per request. * **/ -void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) +void blk_add_driver_data(struct request *rq, void *data, size_t len) { struct blk_trace *bt; rcu_read_lock(); - bt = rcu_dereference(q->blk_trace); + bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; @@ -1138,7 +1077,7 @@ void blk_add_driver_data(struct request_queue *q, __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, - blk_trace_request_get_cgid(q, rq)); + blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1169,8 +1108,6 @@ static void blk_register_tracepoints(void) WARN_ON(ret); ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); WARN_ON(ret); - ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); - WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); @@ -1190,7 +1127,6 @@ static void blk_unregister_tracepoints(void) unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug(blk_add_trace_unplug, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); - unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); @@ -1815,30 +1751,15 @@ static ssize_t blk_trace_mask2str(char *buf, int mask) return p - buf; } -static struct request_queue *blk_trace_get_queue(struct block_device *bdev) -{ - if (bdev->bd_disk == NULL) - return NULL; - - return bdev_get_queue(bdev); -} - static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct block_device *bdev = bdget_part(dev_to_part(dev)); - struct request_queue *q; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; ssize_t ret = -ENXIO; - if (bdev == NULL) - goto out; - - q = blk_trace_get_queue(bdev); - if (q == NULL) - goto out_bdput; - mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, @@ -1861,9 +1782,6 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, out_unlock_bdev: mutex_unlock(&q->debugfs_mutex); -out_bdput: - bdput(bdev); -out: return ret; } @@ -1871,8 +1789,8 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct block_device *bdev; - struct request_queue *q; + struct block_device *bdev = dev_to_bdev(dev); + struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1888,17 +1806,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; value = ret; } - } else if (kstrtoull(buf, 0, &value)) - goto out; - - ret = -ENXIO; - bdev = bdget_part(dev_to_part(dev)); - if (bdev == NULL) - goto out; - - q = blk_trace_get_queue(bdev); - if (q == NULL) - goto out_bdput; + } else { + if (kstrtoull(buf, 0, &value)) + goto out; + } mutex_lock(&q->debugfs_mutex); @@ -1936,8 +1847,6 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, out_unlock_bdev: mutex_unlock(&q->debugfs_mutex); -out_bdput: - bdput(bdev); out: return ret ? ret : count; } diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 267aa7709416..d693d9213ceb 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -12,33 +12,24 @@ /* * See if we have deferred clears that we can batch move */ -static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) +static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) { - unsigned long mask, val; - bool ret = false; - unsigned long flags; + unsigned long mask; - spin_lock_irqsave(&sb->map[index].swap_lock, flags); - - if (!sb->map[index].cleared) - goto out_unlock; + if (!READ_ONCE(map->cleared)) + return false; /* * First get a stable cleared mask, setting the old mask to 0. */ - mask = xchg(&sb->map[index].cleared, 0); + mask = xchg(&map->cleared, 0); /* * Now clear the masked bits in our free word */ - do { - val = sb->map[index].word; - } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); - - ret = true; -out_unlock: - spin_unlock_irqrestore(&sb->map[index].swap_lock, flags); - return ret; + atomic_long_andnot(mask, (atomic_long_t *)&map->word); + BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word)); + return true; } int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, @@ -80,7 +71,6 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, for (i = 0; i < sb->map_nr; i++) { sb->map[i].depth = min(depth, bits_per_word); depth -= sb->map[i].depth; - spin_lock_init(&sb->map[i].swap_lock); } return 0; } @@ -92,7 +82,7 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) unsigned int i; for (i = 0; i < sb->map_nr; i++) - sbitmap_deferred_clear(sb, i); + sbitmap_deferred_clear(&sb->map[i]); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); @@ -107,9 +97,11 @@ EXPORT_SYMBOL_GPL(sbitmap_resize); static int __sbitmap_get_word(unsigned long *word, unsigned long depth, unsigned int hint, bool wrap) { - unsigned int orig_hint = hint; int nr; + /* don't wrap if starting from 0 */ + wrap = wrap && hint; + while (1) { nr = find_next_zero_bit(word, depth, hint); if (unlikely(nr >= depth)) { @@ -118,8 +110,8 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, * offset to 0 in a failure case, so start from 0 to * exhaust the map. */ - if (orig_hint && hint && wrap) { - hint = orig_hint = 0; + if (hint && wrap) { + hint = 0; continue; } return -1; @@ -139,15 +131,15 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, unsigned int alloc_hint, bool round_robin) { + struct sbitmap_word *map = &sb->map[index]; int nr; do { - nr = __sbitmap_get_word(&sb->map[index].word, - sb->map[index].depth, alloc_hint, + nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, !round_robin); if (nr != -1) break; - if (!sbitmap_deferred_clear(sb, index)) + if (!sbitmap_deferred_clear(map)) break; } while (1); @@ -207,7 +199,7 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, break; } - if (sbitmap_deferred_clear(sb, index)) + if (sbitmap_deferred_clear(&sb->map[index])) goto again; /* Jump to next index. */ diff --git a/mm/Kconfig b/mm/Kconfig index cf04bc3c866c..4275c25b5d8a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -720,19 +720,17 @@ config ZSMALLOC_STAT config GENERIC_EARLY_IOREMAP bool -config MAX_STACK_SIZE_MB - int "Maximum user stack size for 32-bit processes (MB)" - default 80 +config STACK_MAX_DEFAULT_SIZE_MB + int "Default maximum user stack size for 32-bit processes (MB)" + default 100 range 8 2048 depends on STACK_GROWSUP && (!64BIT || COMPAT) help This is the maximum stack size in Megabytes in the VM layout of 32-bit user processes when the stack grows upwards (currently only on parisc - arch). The stack will be located at the highest memory address minus - the given value, unless the RLIMIT_STACK hard limit is changed to a - smaller value in which case that is used. + arch) when the RLIMIT_STACK hard limit is unlimited. - A sane initial value is 80 MB. + A sane initial value is 100 MB. config DEFERRED_STRUCT_PAGE_INIT bool "Defer initialisation of struct pages to kthreads" diff --git a/mm/filemap.c b/mm/filemap.c index 8d9a183d17a9..df1e5ebae669 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2981,14 +2981,14 @@ EXPORT_SYMBOL(filemap_map_pages); vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) { + struct address_space *mapping = vmf->vma->vm_file->f_mapping; struct page *page = vmf->page; - struct inode *inode = file_inode(vmf->vma->vm_file); vm_fault_t ret = VM_FAULT_LOCKED; - sb_start_pagefault(inode->i_sb); + sb_start_pagefault(mapping->host->i_sb); file_update_time(vmf->vma->vm_file); lock_page(page); - if (page->mapping != inode->i_mapping) { + if (page->mapping != mapping) { unlock_page(page); ret = VM_FAULT_NOPAGE; goto out; @@ -3001,7 +3001,7 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) set_page_dirty(page); wait_for_stable_page(page); out: - sb_end_pagefault(inode->i_sb); + sb_end_pagefault(mapping->host->i_sb); return ret; } @@ -3244,10 +3244,9 @@ void dio_warn_stale_pagecache(struct file *filp) { static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); char pathname[128]; - struct inode *inode = file_inode(filp); char *path; - errseq_set(&inode->i_mapping->wb_err, -EIO); + errseq_set(&filp->f_mapping->wb_err, -EIO); if (__ratelimit(&_rs)) { path = file_path(filp, pathname, sizeof(pathname)); if (IS_ERR(path)) @@ -3274,7 +3273,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_flags & IOCB_NOWAIT) { /* If there are pages to writeback, return */ - if (filemap_range_has_page(inode->i_mapping, pos, + if (filemap_range_has_page(file->f_mapping, pos, pos + write_len - 1)) return -EAGAIN; } else { diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b0b6e6a4784e..2455b0c0e486 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -464,7 +464,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, .fl4_dport = dccp_hdr(skb)->dccph_sport, }; - security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 78ee1b5acf1f..1f73603913f5 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -203,7 +203,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req fl6.flowi6_oif = ireq->ir_iif; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = htons(ireq->ir_num); - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); @@ -279,7 +279,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb) fl6.flowi6_oif = inet6_iif(rxskb); fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; - security_skb_classify_flow(rxskb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(rxskb, flowi6_to_flowi_common(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); @@ -907,7 +907,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 005faea415a4..396b492c804f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -447,7 +447,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev); - security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) goto out_unlock; @@ -503,7 +503,7 @@ static struct rtable *icmp_route_lookup(struct net *net, route_lookup_dev = icmp_get_route_lookup_dev(skb_in); fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev); - security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); + security_skb_classify_flow(skb_in, flowi4_to_flowi_common(fl4)); rt = ip_route_output_key_hash(net, fl4, skb_in); if (IS_ERR(rt)) return rt; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f60869acbef0..fd8b8800a2c3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -602,7 +602,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, htons(ireq->ir_num), sk->sk_uid); - security_req_classify_flow(req, flowi4_to_flowi(fl4)); + security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; @@ -640,7 +640,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, ireq->ir_rmt_port, htons(ireq->ir_num), sk->sk_uid); - security_req_classify_flow(req, flowi4_to_flowi(fl4)); + security_req_classify_flow(req, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 879b76ae4435..89fff5f59eea 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1700,7 +1700,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, daddr, saddr, tcp_hdr(skb)->source, tcp_hdr(skb)->dest, arg->uid); - security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 248856b301c4..8b943f85fff9 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -778,7 +778,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4.fl4_icmp_type = user_icmph.type; fl4.fl4_icmp_code = user_icmph.code; - security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7d26e0f8bdae..50a73178d63a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -640,7 +640,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto done; } - security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 00dc3f943c80..33792cf55a79 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -418,7 +418,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) inet_sk_flowi_flags(sk), opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); - security_req_classify_flow(req, flowi4_to_flowi(&fl4)); + security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { reqsk_free(req); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dece195f212c..7103b0a89756 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1196,7 +1196,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) faddr, saddr, dport, inet->inet_sport, sk->sk_uid); - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a7e3d170af51..8e9c3e9ea36e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -819,7 +819,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sk->sk_uid; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc8ad7ddecda..206f66310a88 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -60,7 +60,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) fl6->flowi6_oif = np->mcast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); } int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 8956144ea65e..f3d05866692e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -573,7 +573,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); np = inet6_sk(sk); @@ -755,7 +755,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); local_bh_disable(); sk = icmpv6_xmit_lock(net); @@ -1008,7 +1008,7 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6, fl6->fl6_icmp_type = type; fl6->fl6_icmp_code = 0; fl6->flowi6_oif = oif; - security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); } static void __net_exit icmpv6_sk_exit(struct net *net) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e315526fa244..5a9f4d722f35 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -46,7 +46,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); fl6->flowi6_uid = sk->sk_uid; - security_req_classify_flow(req, flowi6_to_flowi(fl6)); + security_req_classify_flow(req, flowi6_to_flowi_common(fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (IS_ERR(dst)) @@ -95,7 +95,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; fl6->flowi6_uid = sk->sk_uid; - security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); rcu_read_lock(); final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 570d1d76c44d..dffeaaaadcde 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -314,7 +314,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb, fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev); fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark); - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(oldskb, flowi6_to_flowi_common(&fl6)); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 6caa062f68e7..6ac88fe24a8e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -111,7 +111,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_uid = sk->sk_uid; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); ipcm6_init_sk(&ipc6, np); ipc6.sockc.mark = sk->sk_mark; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 6e4ab80a3b94..1f56d9aae589 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -915,7 +915,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_oif = np->mcast_oif; else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 9b6cae1e49d9..e8cfb9e997bf 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -233,7 +233,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; fl6.flowi6_uid = sk->sk_uid; - security_req_classify_flow(req, flowi6_to_flowi(&fl6)); + security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e254569a3005..0e1509b02cb3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -278,7 +278,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -965,7 +965,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); - security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); + security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST * Underlying function will use this to retrieve the network diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9008f5796ad4..b9f3dfdd2383 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1498,7 +1498,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index e5e5036257b0..96f975777438 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -606,7 +606,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (ipc6.tclass < 0) ipc6.tclass = np->tclass; diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index d7d34a62d3bf..b100c04a0e43 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -849,7 +849,7 @@ synproxy_send_tcp_ipv6(struct net *net, fl6.fl6_sport = nth->source; fl6.fl6_dport = nth->dest; security_skb_classify_flow((struct sk_buff *)skb, - flowi6_to_flowi(&fl6)); + flowi6_to_flowi_common(&fl6)); err = nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); if (err) { goto free_nskb; diff --git a/net/socket.c b/net/socket.c index f4536f875834..d35ea0a8de93 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2176,6 +2176,17 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, * Shutdown a socket. */ +int __sys_shutdown_sock(struct socket *sock, int how) +{ + int err; + + err = security_socket_shutdown(sock, how); + if (!err) + err = sock->ops->shutdown(sock, how); + + return err; +} + int __sys_shutdown(int fd, int how) { int err, fput_needed; @@ -2183,9 +2194,7 @@ int __sys_shutdown(int fd, int how) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock != NULL) { - err = security_socket_shutdown(sock, how); - if (!err) - err = sock->ops->shutdown(sock, how); + err = __sys_shutdown_sock(sock, how); fput_light(sock->file, fput_needed); } return err; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5073f55d59b0..4949e8faf9a0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1021,7 +1021,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, if ((x->sel.family && (x->sel.family != family || !xfrm_selector_match(&x->sel, fl, family))) || - !security_xfrm_state_pol_flow_match(x, pol, fl)) + !security_xfrm_state_pol_flow_match(x, pol, + &fl->u.__fl_common)) return; if (!*best || @@ -1036,7 +1037,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, if ((!x->sel.family || (x->sel.family == family && xfrm_selector_match(&x->sel, fl, family))) && - security_xfrm_state_pol_flow_match(x, pol, fl)) + security_xfrm_state_pol_flow_match(x, pol, + &fl->u.__fl_common)) *error = -ESRCH; } } diff --git a/scripts/gcc-plugin.sh b/scripts/gcc-plugin.sh deleted file mode 100755 index b79fd0bea838..000000000000 --- a/scripts/gcc-plugin.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -set -e - -srctree=$(dirname "$0") - -gccplugins_dir=$($* -print-file-name=plugin) - -# we need a c++ compiler that supports the designated initializer GNU extension -$HOSTCC -c -x c++ -std=gnu++98 - -fsyntax-only -I $srctree/gcc-plugins -I $gccplugins_dir/include 2>/dev/null < -#endif - -#if BUILDING_GCC_VERSION >= 4007 #include "tree-pretty-print.h" #include "gimple-pretty-print.h" -#endif - -#if BUILDING_GCC_VERSION >= 4006 -/* - * The c-family headers were moved into a subdirectory in GCC version - * 4.7, but most plugin-building users of GCC 4.6 are using the Debian - * or Ubuntu package, which has an out-of-tree patch to move this to the - * same location as found in 4.7 and later: - * https://sources.debian.net/src/gcc-4.6/4.6.3-14/debian/patches/pr45078.diff/ - */ #include "c-family/c-common.h" -#else -#include "c-common.h" -#endif - -#if BUILDING_GCC_VERSION <= 4008 -#include "tree-flow.h" -#else #include "tree-cfgcleanup.h" #include "tree-ssa-operands.h" #include "tree-into-ssa.h" -#endif - -#if BUILDING_GCC_VERSION >= 4008 #include "is-a.h" -#endif - #include "diagnostic.h" #include "tree-dump.h" #include "tree-pass.h" -#if BUILDING_GCC_VERSION >= 4009 #include "pass_manager.h" -#endif #include "predict.h" #include "ipa-utils.h" @@ -103,7 +73,6 @@ #include "stringpool.h" #endif -#if BUILDING_GCC_VERSION >= 4009 #include "attribs.h" #include "varasm.h" #include "stor-layout.h" @@ -122,18 +91,13 @@ #include "tree-eh.h" #include "stmt.h" #include "gimplify.h" -#endif - #include "gimple.h" - -#if BUILDING_GCC_VERSION >= 4009 #include "tree-ssa-operands.h" #include "tree-phinodes.h" #include "tree-cfg.h" #include "gimple-iterator.h" #include "gimple-ssa.h" #include "ssa-iterators.h" -#endif #if BUILDING_GCC_VERSION >= 5000 #include "builtins.h" @@ -143,15 +107,6 @@ void debug_dominance_info(enum cdi_direction dir); void debug_dominance_tree(enum cdi_direction dir, basic_block root); -#if BUILDING_GCC_VERSION == 4006 -void debug_gimple_stmt(gimple); -void debug_gimple_seq(gimple_seq); -void print_gimple_seq(FILE *, gimple_seq, int, int); -void print_gimple_stmt(FILE *, gimple, int, int); -void print_gimple_expr(FILE *, gimple, int, int); -void dump_gimple_stmt(pretty_printer *, gimple, int, int); -#endif - #ifndef __unused #define __unused __attribute__((__unused__)) #endif @@ -190,372 +145,12 @@ struct register_pass_info NAME##_pass_info = { \ .pos_op = POS, \ } -#if BUILDING_GCC_VERSION == 4005 -#define FOR_EACH_LOCAL_DECL(FUN, I, D) \ - for (tree vars = (FUN)->local_decls, (I) = 0; \ - vars && ((D) = TREE_VALUE(vars)); \ - vars = TREE_CHAIN(vars), (I)++) -#define DECL_CHAIN(NODE) (TREE_CHAIN(DECL_MINIMAL_CHECK(NODE))) -#define FOR_EACH_VEC_ELT(T, V, I, P) \ - for (I = 0; VEC_iterate(T, (V), (I), (P)); ++(I)) -#define TODO_rebuild_cgraph_edges 0 -#define SCOPE_FILE_SCOPE_P(EXP) (!(EXP)) - -#ifndef O_BINARY -#define O_BINARY 0 -#endif - -typedef struct varpool_node *varpool_node_ptr; - -static inline bool gimple_call_builtin_p(gimple stmt, enum built_in_function code) -{ - tree fndecl; - - if (!is_gimple_call(stmt)) - return false; - fndecl = gimple_call_fndecl(stmt); - if (!fndecl || DECL_BUILT_IN_CLASS(fndecl) != BUILT_IN_NORMAL) - return false; - return DECL_FUNCTION_CODE(fndecl) == code; -} - -static inline bool is_simple_builtin(tree decl) -{ - if (decl && DECL_BUILT_IN_CLASS(decl) != BUILT_IN_NORMAL) - return false; - - switch (DECL_FUNCTION_CODE(decl)) { - /* Builtins that expand to constants. */ - case BUILT_IN_CONSTANT_P: - case BUILT_IN_EXPECT: - case BUILT_IN_OBJECT_SIZE: - case BUILT_IN_UNREACHABLE: - /* Simple register moves or loads from stack. */ - case BUILT_IN_RETURN_ADDRESS: - case BUILT_IN_EXTRACT_RETURN_ADDR: - case BUILT_IN_FROB_RETURN_ADDR: - case BUILT_IN_RETURN: - case BUILT_IN_AGGREGATE_INCOMING_ADDRESS: - case BUILT_IN_FRAME_ADDRESS: - case BUILT_IN_VA_END: - case BUILT_IN_STACK_SAVE: - case BUILT_IN_STACK_RESTORE: - /* Exception state returns or moves registers around. */ - case BUILT_IN_EH_FILTER: - case BUILT_IN_EH_POINTER: - case BUILT_IN_EH_COPY_VALUES: - return true; - - default: - return false; - } -} - -static inline void add_local_decl(struct function *fun, tree d) -{ - gcc_assert(TREE_CODE(d) == VAR_DECL); - fun->local_decls = tree_cons(NULL_TREE, d, fun->local_decls); -} -#endif - -#if BUILDING_GCC_VERSION <= 4006 -#define ANY_RETURN_P(rtx) (GET_CODE(rtx) == RETURN) -#define C_DECL_REGISTER(EXP) DECL_LANG_FLAG_4(EXP) -#define EDGE_PRESERVE 0ULL -#define HOST_WIDE_INT_PRINT_HEX_PURE "%" HOST_WIDE_INT_PRINT "x" -#define flag_fat_lto_objects true - -#define get_random_seed(noinit) ({ \ - unsigned HOST_WIDE_INT seed; \ - sscanf(get_random_seed(noinit), "%" HOST_WIDE_INT_PRINT "x", &seed); \ - seed * seed; }) - -#define int_const_binop(code, arg1, arg2) \ - int_const_binop((code), (arg1), (arg2), 0) - -static inline bool gimple_clobber_p(gimple s __unused) -{ - return false; -} - -static inline bool gimple_asm_clobbers_memory_p(const_gimple stmt) -{ - unsigned i; - - for (i = 0; i < gimple_asm_nclobbers(stmt); i++) { - tree op = gimple_asm_clobber_op(stmt, i); - - if (!strcmp(TREE_STRING_POINTER(TREE_VALUE(op)), "memory")) - return true; - } - - return false; -} - -static inline tree builtin_decl_implicit(enum built_in_function fncode) -{ - return implicit_built_in_decls[fncode]; -} - -static inline int ipa_reverse_postorder(struct cgraph_node **order) -{ - return cgraph_postorder(order); -} - -static inline struct cgraph_node *cgraph_create_node(tree decl) -{ - return cgraph_node(decl); -} - -static inline struct cgraph_node *cgraph_get_create_node(tree decl) -{ - struct cgraph_node *node = cgraph_get_node(decl); - - return node ? node : cgraph_node(decl); -} - -static inline bool cgraph_function_with_gimple_body_p(struct cgraph_node *node) -{ - return node->analyzed && !node->thunk.thunk_p && !node->alias; -} - -static inline struct cgraph_node *cgraph_first_function_with_gimple_body(void) -{ - struct cgraph_node *node; - - for (node = cgraph_nodes; node; node = node->next) - if (cgraph_function_with_gimple_body_p(node)) - return node; - return NULL; -} - -static inline struct cgraph_node *cgraph_next_function_with_gimple_body(struct cgraph_node *node) -{ - for (node = node->next; node; node = node->next) - if (cgraph_function_with_gimple_body_p(node)) - return node; - return NULL; -} - -static inline bool cgraph_for_node_and_aliases(cgraph_node_ptr node, bool (*callback)(cgraph_node_ptr, void *), void *data, bool include_overwritable) -{ - cgraph_node_ptr alias; - - if (callback(node, data)) - return true; - - for (alias = node->same_body; alias; alias = alias->next) { - if (include_overwritable || cgraph_function_body_availability(alias) > AVAIL_OVERWRITABLE) - if (cgraph_for_node_and_aliases(alias, callback, data, include_overwritable)) - return true; - } - - return false; -} - -#define FOR_EACH_FUNCTION_WITH_GIMPLE_BODY(node) \ - for ((node) = cgraph_first_function_with_gimple_body(); (node); \ - (node) = cgraph_next_function_with_gimple_body(node)) - -static inline void varpool_add_new_variable(tree decl) -{ - varpool_finalize_decl(decl); -} -#endif - -#if BUILDING_GCC_VERSION <= 4007 -#define FOR_EACH_FUNCTION(node) \ - for (node = cgraph_nodes; node; node = node->next) -#define FOR_EACH_VARIABLE(node) \ - for (node = varpool_nodes; node; node = node->next) -#define PROP_loops 0 -#define NODE_SYMBOL(node) (node) -#define NODE_DECL(node) (node)->decl -#define INSN_LOCATION(INSN) RTL_LOCATION(INSN) -#define vNULL NULL - -static inline int bb_loop_depth(const_basic_block bb) -{ - return bb->loop_father ? loop_depth(bb->loop_father) : 0; -} - -static inline bool gimple_store_p(gimple gs) -{ - tree lhs = gimple_get_lhs(gs); - - return lhs && !is_gimple_reg(lhs); -} - -static inline void gimple_init_singleton(gimple g __unused) -{ -} -#endif - -#if BUILDING_GCC_VERSION == 4007 || BUILDING_GCC_VERSION == 4008 -static inline struct cgraph_node *cgraph_alias_target(struct cgraph_node *n) -{ - return cgraph_alias_aliased_node(n); -} -#endif - -#if BUILDING_GCC_VERSION <= 4008 -#define ENTRY_BLOCK_PTR_FOR_FN(FN) ENTRY_BLOCK_PTR_FOR_FUNCTION(FN) -#define EXIT_BLOCK_PTR_FOR_FN(FN) EXIT_BLOCK_PTR_FOR_FUNCTION(FN) -#define basic_block_info_for_fn(FN) ((FN)->cfg->x_basic_block_info) -#define n_basic_blocks_for_fn(FN) ((FN)->cfg->x_n_basic_blocks) -#define n_edges_for_fn(FN) ((FN)->cfg->x_n_edges) -#define last_basic_block_for_fn(FN) ((FN)->cfg->x_last_basic_block) -#define label_to_block_map_for_fn(FN) ((FN)->cfg->x_label_to_block_map) -#define profile_status_for_fn(FN) ((FN)->cfg->x_profile_status) -#define BASIC_BLOCK_FOR_FN(FN, N) BASIC_BLOCK_FOR_FUNCTION((FN), (N)) -#define NODE_IMPLICIT_ALIAS(node) (node)->same_body_alias -#define VAR_P(NODE) (TREE_CODE(NODE) == VAR_DECL) - -static inline bool tree_fits_shwi_p(const_tree t) -{ - if (t == NULL_TREE || TREE_CODE(t) != INTEGER_CST) - return false; - - if (TREE_INT_CST_HIGH(t) == 0 && (HOST_WIDE_INT)TREE_INT_CST_LOW(t) >= 0) - return true; - - if (TREE_INT_CST_HIGH(t) == -1 && (HOST_WIDE_INT)TREE_INT_CST_LOW(t) < 0 && !TYPE_UNSIGNED(TREE_TYPE(t))) - return true; - - return false; -} - -static inline bool tree_fits_uhwi_p(const_tree t) -{ - if (t == NULL_TREE || TREE_CODE(t) != INTEGER_CST) - return false; - - return TREE_INT_CST_HIGH(t) == 0; -} - -static inline HOST_WIDE_INT tree_to_shwi(const_tree t) -{ - gcc_assert(tree_fits_shwi_p(t)); - return TREE_INT_CST_LOW(t); -} - -static inline unsigned HOST_WIDE_INT tree_to_uhwi(const_tree t) -{ - gcc_assert(tree_fits_uhwi_p(t)); - return TREE_INT_CST_LOW(t); -} - -static inline const char *get_tree_code_name(enum tree_code code) -{ - gcc_assert(code < MAX_TREE_CODES); - return tree_code_name[code]; -} - -#define ipa_remove_stmt_references(cnode, stmt) - -typedef union gimple_statement_d gasm; -typedef union gimple_statement_d gassign; -typedef union gimple_statement_d gcall; -typedef union gimple_statement_d gcond; -typedef union gimple_statement_d gdebug; -typedef union gimple_statement_d ggoto; -typedef union gimple_statement_d gphi; -typedef union gimple_statement_d greturn; - -static inline gasm *as_a_gasm(gimple stmt) -{ - return stmt; -} - -static inline const gasm *as_a_const_gasm(const_gimple stmt) -{ - return stmt; -} - -static inline gassign *as_a_gassign(gimple stmt) -{ - return stmt; -} - -static inline const gassign *as_a_const_gassign(const_gimple stmt) -{ - return stmt; -} - -static inline gcall *as_a_gcall(gimple stmt) -{ - return stmt; -} - -static inline const gcall *as_a_const_gcall(const_gimple stmt) -{ - return stmt; -} - -static inline gcond *as_a_gcond(gimple stmt) -{ - return stmt; -} - -static inline const gcond *as_a_const_gcond(const_gimple stmt) -{ - return stmt; -} - -static inline gdebug *as_a_gdebug(gimple stmt) -{ - return stmt; -} - -static inline const gdebug *as_a_const_gdebug(const_gimple stmt) -{ - return stmt; -} - -static inline ggoto *as_a_ggoto(gimple stmt) -{ - return stmt; -} - -static inline const ggoto *as_a_const_ggoto(const_gimple stmt) -{ - return stmt; -} - -static inline gphi *as_a_gphi(gimple stmt) -{ - return stmt; -} - -static inline const gphi *as_a_const_gphi(const_gimple stmt) -{ - return stmt; -} - -static inline greturn *as_a_greturn(gimple stmt) -{ - return stmt; -} - -static inline const greturn *as_a_const_greturn(const_gimple stmt) -{ - return stmt; -} -#endif - -#if BUILDING_GCC_VERSION == 4008 -#define NODE_SYMBOL(node) (&(node)->symbol) -#define NODE_DECL(node) (node)->symbol.decl -#endif - -#if BUILDING_GCC_VERSION >= 4008 #define add_referenced_var(var) #define mark_sym_for_renaming(var) #define varpool_mark_needed_node(node) #define create_var_ann(var) #define TODO_dump_func 0 #define TODO_dump_cgraph 0 -#endif #if BUILDING_GCC_VERSION <= 4009 #define TODO_verify_il 0 @@ -676,7 +271,6 @@ static inline const greturn *as_a_const_greturn(const_gimple stmt) } #endif -#if BUILDING_GCC_VERSION >= 4009 #define TODO_ggc_collect 0 #define NODE_SYMBOL(node) (node) #define NODE_DECL(node) (node)->decl @@ -687,7 +281,6 @@ static inline opt_pass *get_pass_for_id(int id) { return g->get_passes()->get_pass_for_id(id); } -#endif #if BUILDING_GCC_VERSION >= 5000 && BUILDING_GCC_VERSION < 6000 /* gimple related */ diff --git a/scripts/gcc-plugins/gcc-generate-gimple-pass.h b/scripts/gcc-plugins/gcc-generate-gimple-pass.h index f20797e80b6d..51780828734e 100644 --- a/scripts/gcc-plugins/gcc-generate-gimple-pass.h +++ b/scripts/gcc-plugins/gcc-generate-gimple-pass.h @@ -73,18 +73,11 @@ #define TODO_FLAGS_FINISH 0 #endif -#if BUILDING_GCC_VERSION >= 4009 namespace { static const pass_data _PASS_NAME_PASS_DATA = { -#else -static struct gimple_opt_pass _PASS_NAME_PASS = { - .pass = { -#endif .type = GIMPLE_PASS, .name = _PASS_NAME_NAME, -#if BUILDING_GCC_VERSION >= 4008 .optinfo_flags = OPTGROUP_NONE, -#endif #if BUILDING_GCC_VERSION >= 5000 #elif BUILDING_GCC_VERSION == 4009 .has_gate = _HAS_GATE, @@ -102,12 +95,8 @@ static struct gimple_opt_pass _PASS_NAME_PASS = { .properties_destroyed = PROPERTIES_DESTROYED, .todo_flags_start = TODO_FLAGS_START, .todo_flags_finish = TODO_FLAGS_FINISH, -#if BUILDING_GCC_VERSION < 4009 - } -#endif }; -#if BUILDING_GCC_VERSION >= 4009 class _PASS_NAME_PASS : public gimple_opt_pass { public: _PASS_NAME_PASS() : gimple_opt_pass(_PASS_NAME_PASS_DATA, g) {} @@ -128,7 +117,6 @@ class _PASS_NAME_PASS : public gimple_opt_pass { #else virtual unsigned int execute(void) { return _EXECUTE(); } #endif -#endif }; } diff --git a/scripts/gcc-plugins/gcc-generate-ipa-pass.h b/scripts/gcc-plugins/gcc-generate-ipa-pass.h index 92bb4f3a87a4..c34ffec035bf 100644 --- a/scripts/gcc-plugins/gcc-generate-ipa-pass.h +++ b/scripts/gcc-plugins/gcc-generate-ipa-pass.h @@ -141,18 +141,11 @@ #define FUNCTION_TRANSFORM_TODO_FLAGS_START 0 #endif -#if BUILDING_GCC_VERSION >= 4009 namespace { static const pass_data _PASS_NAME_PASS_DATA = { -#else -static struct ipa_opt_pass_d _PASS_NAME_PASS = { - .pass = { -#endif .type = IPA_PASS, .name = _PASS_NAME_NAME, -#if BUILDING_GCC_VERSION >= 4008 .optinfo_flags = OPTGROUP_NONE, -#endif #if BUILDING_GCC_VERSION >= 5000 #elif BUILDING_GCC_VERSION == 4009 .has_gate = _HAS_GATE, @@ -170,23 +163,8 @@ static struct ipa_opt_pass_d _PASS_NAME_PASS = { .properties_destroyed = PROPERTIES_DESTROYED, .todo_flags_start = TODO_FLAGS_START, .todo_flags_finish = TODO_FLAGS_FINISH, -#if BUILDING_GCC_VERSION < 4009 - }, - .generate_summary = _GENERATE_SUMMARY, - .write_summary = _WRITE_SUMMARY, - .read_summary = _READ_SUMMARY, -#if BUILDING_GCC_VERSION >= 4006 - .write_optimization_summary = _WRITE_OPTIMIZATION_SUMMARY, - .read_optimization_summary = _READ_OPTIMIZATION_SUMMARY, -#endif - .stmt_fixup = _STMT_FIXUP, - .function_transform_todo_flags_start = FUNCTION_TRANSFORM_TODO_FLAGS_START, - .function_transform = _FUNCTION_TRANSFORM, - .variable_transform = _VARIABLE_TRANSFORM, -#endif }; -#if BUILDING_GCC_VERSION >= 4009 class _PASS_NAME_PASS : public ipa_opt_pass_d { public: _PASS_NAME_PASS() : ipa_opt_pass_d(_PASS_NAME_PASS_DATA, @@ -206,7 +184,6 @@ class _PASS_NAME_PASS : public ipa_opt_pass_d { virtual bool gate(function *) { return _GATE(); } #else virtual bool gate(void) { return _GATE(); } -#endif #endif virtual opt_pass *clone() { return new _PASS_NAME_PASS(); } diff --git a/scripts/gcc-plugins/gcc-generate-rtl-pass.h b/scripts/gcc-plugins/gcc-generate-rtl-pass.h index d69cd80b6c10..d14614f4b139 100644 --- a/scripts/gcc-plugins/gcc-generate-rtl-pass.h +++ b/scripts/gcc-plugins/gcc-generate-rtl-pass.h @@ -73,18 +73,11 @@ #define TODO_FLAGS_FINISH 0 #endif -#if BUILDING_GCC_VERSION >= 4009 namespace { static const pass_data _PASS_NAME_PASS_DATA = { -#else -static struct rtl_opt_pass _PASS_NAME_PASS = { - .pass = { -#endif .type = RTL_PASS, .name = _PASS_NAME_NAME, -#if BUILDING_GCC_VERSION >= 4008 .optinfo_flags = OPTGROUP_NONE, -#endif #if BUILDING_GCC_VERSION >= 5000 #elif BUILDING_GCC_VERSION == 4009 .has_gate = _HAS_GATE, @@ -102,12 +95,8 @@ static struct rtl_opt_pass _PASS_NAME_PASS = { .properties_destroyed = PROPERTIES_DESTROYED, .todo_flags_start = TODO_FLAGS_START, .todo_flags_finish = TODO_FLAGS_FINISH, -#if BUILDING_GCC_VERSION < 4009 - } -#endif }; -#if BUILDING_GCC_VERSION >= 4009 class _PASS_NAME_PASS : public rtl_opt_pass { public: _PASS_NAME_PASS() : rtl_opt_pass(_PASS_NAME_PASS_DATA, g) {} @@ -136,12 +125,6 @@ opt_pass *_MAKE_PASS_NAME_PASS(void) { return new _PASS_NAME_PASS(); } -#else -struct opt_pass *_MAKE_PASS_NAME_PASS(void) -{ - return &_PASS_NAME_PASS.pass; -} -#endif /* clean up user provided defines */ #undef PASS_NAME diff --git a/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h b/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h index 06800bc477e0..ef6f4c2cb6fa 100644 --- a/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h +++ b/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h @@ -73,18 +73,11 @@ #define TODO_FLAGS_FINISH 0 #endif -#if BUILDING_GCC_VERSION >= 4009 namespace { static const pass_data _PASS_NAME_PASS_DATA = { -#else -static struct simple_ipa_opt_pass _PASS_NAME_PASS = { - .pass = { -#endif .type = SIMPLE_IPA_PASS, .name = _PASS_NAME_NAME, -#if BUILDING_GCC_VERSION >= 4008 .optinfo_flags = OPTGROUP_NONE, -#endif #if BUILDING_GCC_VERSION >= 5000 #elif BUILDING_GCC_VERSION == 4009 .has_gate = _HAS_GATE, @@ -102,12 +95,8 @@ static struct simple_ipa_opt_pass _PASS_NAME_PASS = { .properties_destroyed = PROPERTIES_DESTROYED, .todo_flags_start = TODO_FLAGS_START, .todo_flags_finish = TODO_FLAGS_FINISH, -#if BUILDING_GCC_VERSION < 4009 - } -#endif }; -#if BUILDING_GCC_VERSION >= 4009 class _PASS_NAME_PASS : public simple_ipa_opt_pass { public: _PASS_NAME_PASS() : simple_ipa_opt_pass(_PASS_NAME_PASS_DATA, g) {} @@ -136,12 +125,6 @@ opt_pass *_MAKE_PASS_NAME_PASS(void) { return new _PASS_NAME_PASS(); } -#else -struct opt_pass *_MAKE_PASS_NAME_PASS(void) -{ - return &_PASS_NAME_PASS.pass; -} -#endif /* clean up user provided defines */ #undef PASS_NAME diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index cbe1d6c4b1a5..9dced66d158e 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -125,11 +125,7 @@ static tree handle_latent_entropy_attribute(tree *node, tree name, bool *no_add_attrs) { tree type; -#if BUILDING_GCC_VERSION <= 4007 - VEC(constructor_elt, gc) *vals; -#else vec *vals; -#endif switch (TREE_CODE(*node)) { default: @@ -181,11 +177,7 @@ static tree handle_latent_entropy_attribute(tree *node, tree name, if (fld) break; -#if BUILDING_GCC_VERSION <= 4007 - vals = VEC_alloc(constructor_elt, gc, nelt); -#else vec_alloc(vals, nelt); -#endif for (fld = lst; fld; fld = TREE_CHAIN(fld)) { tree random_const, fld_t = TREE_TYPE(fld); @@ -225,11 +217,7 @@ static tree handle_latent_entropy_attribute(tree *node, tree name, elt_size_int = TREE_INT_CST_LOW(elt_size); nelt = array_size_int / elt_size_int; -#if BUILDING_GCC_VERSION <= 4007 - vals = VEC_alloc(constructor_elt, gc, nelt); -#else vec_alloc(vals, nelt); -#endif for (i = 0; i < nelt; i++) { tree cst = size_int(i); diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index bd29e4e7a524..334741a31d0a 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -590,16 +590,12 @@ static void register_attributes(void *event_data, void *data) randomize_layout_attr.name = "randomize_layout"; randomize_layout_attr.type_required = true; randomize_layout_attr.handler = handle_randomize_layout_attr; -#if BUILDING_GCC_VERSION >= 4007 randomize_layout_attr.affects_type_identity = true; -#endif no_randomize_layout_attr.name = "no_randomize_layout"; no_randomize_layout_attr.type_required = true; no_randomize_layout_attr.handler = handle_randomize_layout_attr; -#if BUILDING_GCC_VERSION >= 4007 no_randomize_layout_attr.affects_type_identity = true; -#endif randomize_considered_attr.name = "randomize_considered"; randomize_considered_attr.type_required = true; diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c index caff4a6c7e9a..23bd023a283b 100644 --- a/scripts/gcc-plugins/sancov_plugin.c +++ b/scripts/gcc-plugins/sancov_plugin.c @@ -80,10 +80,8 @@ static void sancov_start_unit(void __unused *gcc_data, void __unused *user_data) nothrow_attr = tree_cons(get_identifier("nothrow"), NULL, NULL); decl_attributes(&sancov_fndecl, nothrow_attr, 0); gcc_assert(TREE_NOTHROW(sancov_fndecl)); -#if BUILDING_GCC_VERSION > 4005 leaf_attr = tree_cons(get_identifier("leaf"), NULL, NULL); decl_attributes(&sancov_fndecl, leaf_attr, 0); -#endif } __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) @@ -106,11 +104,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gc }; /* BBs can be split afterwards?? */ -#if BUILDING_GCC_VERSION >= 4009 PASS_INFO(sancov, "asan", 0, PASS_POS_INSERT_BEFORE); -#else - PASS_INFO(sancov, "nrv", 1, PASS_POS_INSERT_BEFORE); -#endif if (!plugin_default_version_check(version, &gcc_version)) { error(G_("incompatible gcc/plugin versions")); diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c index 48e141e07956..e9db7dcb3e5f 100644 --- a/scripts/gcc-plugins/stackleak_plugin.c +++ b/scripts/gcc-plugins/stackleak_plugin.c @@ -80,10 +80,8 @@ static bool is_alloca(gimple stmt) if (gimple_call_builtin_p(stmt, BUILT_IN_ALLOCA)) return true; -#if BUILDING_GCC_VERSION >= 4007 if (gimple_call_builtin_p(stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) return true; -#endif return false; } @@ -322,7 +320,7 @@ static void remove_stack_tracking_gcall(void) /* Delete the stackleak_track_stack() call */ delete_insn_and_edges(insn); -#if BUILDING_GCC_VERSION >= 4007 && BUILDING_GCC_VERSION < 8000 +#if BUILDING_GCC_VERSION < 8000 if (GET_CODE(next) == NOTE && NOTE_KIND(next) == NOTE_INSN_CALL_ARG_LOCATION) { insn = next; diff --git a/scripts/gcc-plugins/structleak_plugin.c b/scripts/gcc-plugins/structleak_plugin.c index b9ef2e162107..29b480c33a8d 100644 --- a/scripts/gcc-plugins/structleak_plugin.c +++ b/scripts/gcc-plugins/structleak_plugin.c @@ -68,9 +68,7 @@ static void register_attributes(void *event_data, void *data) { user_attr.name = "user"; user_attr.handler = handle_user_attribute; -#if BUILDING_GCC_VERSION >= 4007 user_attr.affects_type_identity = true; -#endif register_attribute(&user_attr); } @@ -137,11 +135,9 @@ static void initialize(tree var) if (!gimple_assign_single_p(stmt)) continue; rhs1 = gimple_assign_rhs1(stmt); -#if BUILDING_GCC_VERSION >= 4007 /* ... of a non-clobbering expression... */ if (TREE_CLOBBER_P(rhs1)) continue; -#endif /* ... to our variable... */ if (gimple_get_lhs(stmt) != var) continue; diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 6ebefec616e4..8e8b1e3cb847 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -156,6 +156,7 @@ int template_desc_init_fields(const char *template_fmt, const struct ima_template_field ***fields, int *num_fields); struct ima_template_desc *ima_template_desc_current(void); +struct ima_template_desc *ima_template_desc_buf(void); struct ima_template_desc *lookup_template_desc(const char *name); bool ima_template_has_modsig(const struct ima_template_desc *ima_template); int ima_restore_measurement_entry(struct ima_template_entry *entry); diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c index 3dd8c2e4314e..8361941ee0a1 100644 --- a/security/integrity/ima/ima_appraise.c +++ b/security/integrity/ima/ima_appraise.c @@ -5,6 +5,7 @@ * Author: * Mimi Zohar */ +#include #include #include #include @@ -16,12 +17,19 @@ #include "ima.h" -static int __init default_appraise_setup(char *str) -{ #ifdef CONFIG_IMA_APPRAISE_BOOTPARAM +static char *ima_appraise_cmdline_default __initdata; +core_param(ima_appraise, ima_appraise_cmdline_default, charp, 0); + +void __init ima_appraise_parse_cmdline(void) +{ + const char *str = ima_appraise_cmdline_default; bool sb_state = arch_ima_get_secureboot(); int appraisal_state = ima_appraise; + if (!str) + return; + if (strncmp(str, "off", 3) == 0) appraisal_state = 0; else if (strncmp(str, "log", 3) == 0) @@ -42,11 +50,8 @@ static int __init default_appraise_setup(char *str) } else { ima_appraise = appraisal_state; } -#endif - return 1; } - -__setup("ima_appraise=", default_appraise_setup); +#endif /* * is_ima_appraise_enabled - return appraise status diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 21989fa0c107..f6a7e9643b54 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -537,7 +537,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) loff_t i_size; int rc; struct file *f = file; - bool new_file_instance = false, modified_mode = false; + bool new_file_instance = false; /* * For consistency, fail file's opened with the O_DIRECT flag on @@ -555,18 +555,10 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL); flags |= O_RDONLY; f = dentry_open(&file->f_path, flags, file->f_cred); - if (IS_ERR(f)) { - /* - * Cannot open the file again, lets modify f_mode - * of original and continue - */ - pr_info_ratelimited("Unable to reopen file for reading.\n"); - f = file; - f->f_mode |= FMODE_READ; - modified_mode = true; - } else { - new_file_instance = true; - } + if (IS_ERR(f)) + return PTR_ERR(f); + + new_file_instance = true; } i_size = i_size_read(file_inode(f)); @@ -581,8 +573,6 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash) out: if (new_file_instance) fput(f); - else if (modified_mode) - f->f_mode &= ~FMODE_READ; return rc; } diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index cb2deaa188e7..f87cb29329e9 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -413,7 +413,7 @@ int ima_file_mmap(struct file *file, unsigned long prot) */ int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot) { - struct ima_template_desc *template; + struct ima_template_desc *template = NULL; struct file *file = vma->vm_file; char filename[NAME_MAX]; char *pathbuf = NULL; @@ -832,7 +832,7 @@ void process_buffer_measurement(struct inode *inode, const void *buf, int size, .filename = eventname, .buf = buf, .buf_len = size}; - struct ima_template_desc *template = NULL; + struct ima_template_desc *template; struct { struct ima_digest_data hdr; char digest[IMA_MAX_DIGEST_SIZE]; @@ -844,6 +844,13 @@ void process_buffer_measurement(struct inode *inode, const void *buf, int size, if (!ima_policy_flag) return; + template = ima_template_desc_buf(); + if (!template) { + ret = -EINVAL; + audit_cause = "ima_template_desc_buf"; + goto out; + } + /* * Both LSM hooks and auxilary based buffer measurements are * based on policy. To avoid code duplication, differentiate @@ -862,19 +869,6 @@ void process_buffer_measurement(struct inode *inode, const void *buf, int size, if (!pcr) pcr = CONFIG_IMA_MEASURE_PCR_IDX; - if (!template) { - template = lookup_template_desc("ima-buf"); - ret = template_desc_init_fields(template->fmt, - &(template->fields), - &(template->num_fields)); - if (ret < 0) { - pr_err("template %s init failed, result: %d\n", - (strlen(template->name) ? - template->name : template->fmt), ret); - return; - } - } - iint.ima_hash = &hash.hdr; iint.ima_hash->algo = ima_hash_algo; iint.ima_hash->length = hash_digest_size[ima_hash_algo]; @@ -934,6 +928,7 @@ static int __init init_ima(void) { int error; + ima_appraise_parse_cmdline(); ima_init_template_list(); hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 9b5adeaa47fc..823a0c1379cb 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -628,7 +628,7 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid, struct ima_rule_entry *entry; int action = 0, actmask = flags | (flags << 1); - if (template_desc) + if (template_desc && !*template_desc) *template_desc = ima_template_desc_current(); rcu_read_lock(); diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c index 1e89e2d3851f..e22e510ae92d 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -55,6 +55,7 @@ static const struct ima_template_field supported_fields[] = { #define MAX_TEMPLATE_NAME_LEN sizeof("d-ng|n-ng|sig|buf|d-modisg|modsig") static struct ima_template_desc *ima_template; +static struct ima_template_desc *ima_buf_template; /** * ima_template_has_modsig - Check whether template has modsig-related fields. @@ -252,11 +253,36 @@ struct ima_template_desc *ima_template_desc_current(void) return ima_template; } +struct ima_template_desc *ima_template_desc_buf(void) +{ + if (!ima_buf_template) { + ima_init_template_list(); + ima_buf_template = lookup_template_desc("ima-buf"); + } + return ima_buf_template; +} + int __init ima_init_template(void) { struct ima_template_desc *template = ima_template_desc_current(); int result; + result = template_desc_init_fields(template->fmt, + &(template->fields), + &(template->num_fields)); + if (result < 0) { + pr_err("template %s init failed, result: %d\n", + (strlen(template->name) ? + template->name : template->fmt), result); + return result; + } + + template = ima_template_desc_buf(); + if (!template) { + pr_err("Failed to get ima-buf template\n"); + return -EINVAL; + } + result = template_desc_init_fields(template->fmt, &(template->fields), &(template->num_fields)); diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 2498451a697d..70fd9576b150 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -241,7 +241,6 @@ static void dump_common_audit_data(struct audit_buffer *ab, audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); } - audit_getcwd(); break; } case LSM_AUDIT_DATA_FILE: { @@ -255,7 +254,6 @@ static void dump_common_audit_data(struct audit_buffer *ab, audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); } - audit_getcwd(); break; } case LSM_AUDIT_DATA_IOCTL_OP: { @@ -271,7 +269,6 @@ static void dump_common_audit_data(struct audit_buffer *ab, } audit_log_format(ab, " ioctlcmd=0x%hx", a->u.op->cmd); - audit_getcwd(); break; } case LSM_AUDIT_DATA_DENTRY: { @@ -286,7 +283,6 @@ static void dump_common_audit_data(struct audit_buffer *ab, audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); } - audit_getcwd(); break; } case LSM_AUDIT_DATA_INODE: { @@ -304,7 +300,6 @@ static void dump_common_audit_data(struct audit_buffer *ab, audit_log_format(ab, " dev="); audit_log_untrustedstring(ab, inode->i_sb->s_id); audit_log_format(ab, " ino=%lu", inode->i_ino); - audit_getcwd(); break; } case LSM_AUDIT_DATA_TASK: { diff --git a/security/security.c b/security/security.c index fe33c0af264f..7b09cfbae94f 100644 --- a/security/security.c +++ b/security/security.c @@ -2208,15 +2208,16 @@ void security_sk_clone(const struct sock *sk, struct sock *newsk) } EXPORT_SYMBOL(security_sk_clone); -void security_sk_classify_flow(struct sock *sk, struct flowi *fl) +void security_sk_classify_flow(struct sock *sk, struct flowi_common *flic) { - call_void_hook(sk_getsecid, sk, &fl->flowi_secid); + call_void_hook(sk_getsecid, sk, &flic->flowic_secid); } EXPORT_SYMBOL(security_sk_classify_flow); -void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +void security_req_classify_flow(const struct request_sock *req, + struct flowi_common *flic) { - call_void_hook(req_classify_flow, req, fl); + call_void_hook(req_classify_flow, req, flic); } EXPORT_SYMBOL(security_req_classify_flow); @@ -2408,7 +2409,7 @@ int security_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) int security_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl) + const struct flowi_common *flic) { struct security_hook_list *hp; int rc = LSM_RET_DEFAULT(xfrm_state_pol_flow_match); @@ -2424,7 +2425,7 @@ int security_xfrm_state_pol_flow_match(struct xfrm_state *x, */ hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match, list) { - rc = hp->hook.xfrm_state_pol_flow_match(x, xp, fl); + rc = hp->hook.xfrm_state_pol_flow_match(x, xp, flic); break; } return rc; @@ -2435,9 +2436,9 @@ int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) return call_int_hook(xfrm_decode_session, 0, skb, secid, 1); } -void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +void security_skb_classify_flow(struct sk_buff *skb, struct flowi_common *flic) { - int rc = call_int_hook(xfrm_decode_session, 0, skb, &fl->flowi_secid, + int rc = call_int_hook(xfrm_decode_session, 0, skb, &flic->flowic_secid, 0); BUG_ON(rc); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 1e6b43bf43c6..965c24582219 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -601,7 +601,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, { const struct cred *cred = current_cred(); struct superblock_security_struct *sbsec = sb->s_security; - struct dentry *root = sbsec->sb->s_root; + struct dentry *root = sb->s_root; struct selinux_mnt_opts *opts = mnt_opts; struct inode_security_struct *root_isec; u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; @@ -1081,7 +1081,7 @@ static int selinux_sb_show_options(struct seq_file *m, struct super_block *sb) return rc; } if (sbsec->flags & ROOTCONTEXT_MNT) { - struct dentry *root = sbsec->sb->s_root; + struct dentry *root = sb->s_root; struct inode_security_struct *isec = backing_inode_security(root); seq_putc(m, ','); seq_puts(m, ROOTCONTEXT_STR); @@ -1454,7 +1454,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent * inode_doinit with a dentry, before these inodes could * be used again by userspace. */ - goto out; + goto out_invalid; } rc = inode_doinit_use_xattr(inode, dentry, sbsec->def_sid, @@ -1511,7 +1511,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent * could be used again by userspace. */ if (!dentry) - goto out; + goto out_invalid; rc = selinux_genfs_get_sid(dentry, sclass, sbsec->flags, &sid); if (rc) { @@ -1536,11 +1536,10 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent out: spin_lock(&isec->lock); if (isec->initialized == LABEL_PENDING) { - if (!sid || rc) { + if (rc) { isec->initialized = LABEL_INVALID; goto out_unlock; } - isec->initialized = LABEL_INITIALIZED; isec->sid = sid; } @@ -1548,6 +1547,15 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent out_unlock: spin_unlock(&isec->lock); return rc; + +out_invalid: + spin_lock(&isec->lock); + if (isec->initialized == LABEL_PENDING) { + isec->initialized = LABEL_INVALID; + isec->sid = sid; + } + spin_unlock(&isec->lock); + return 0; } /* Convert a Linux signal to an access vector. */ @@ -2563,7 +2571,6 @@ static int selinux_sb_alloc_security(struct super_block *sb) mutex_init(&sbsec->lock); INIT_LIST_HEAD(&sbsec->isec_head); spin_lock_init(&sbsec->isec_lock); - sbsec->sb = sb; sbsec->sid = SECINITSID_UNLABELED; sbsec->def_sid = SECINITSID_FILE; sbsec->mntpoint_sid = SECINITSID_UNLABELED; @@ -4032,6 +4039,7 @@ static int selinux_kernel_load_data(enum kernel_load_data_id id, bool contents) switch (id) { case LOADING_MODULE: rc = selinux_kernel_module_from_file(NULL); + break; default: break; } @@ -5432,9 +5440,9 @@ static void selinux_secmark_refcount_dec(void) } static void selinux_req_classify_flow(const struct request_sock *req, - struct flowi *fl) + struct flowi_common *flic) { - fl->flowi_secid = req->secid; + flic->flowic_secid = req->secid; } static int selinux_tun_dev_alloc_security(void **security) diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 330b7b6d44e0..ca4d7ab6a835 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -61,7 +61,6 @@ struct file_security_struct { }; struct superblock_security_struct { - struct super_block *sb; /* back pointer to sb object */ u32 sid; /* SID of file system superblock */ u32 def_sid; /* default SID for labeling */ u32 mntpoint_sid; /* SECURITY_FS_USE_MNTPOINT context for files */ diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index a0b465316292..0a6f34a7a971 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -26,7 +26,7 @@ int selinux_xfrm_state_delete(struct xfrm_state *x); int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl); + const struct flowi_common *flic); #ifdef CONFIG_SECURITY_NETWORK_XFRM extern atomic_t selinux_xfrm_refcount; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 17979bbd0b03..6298ae068bdf 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -596,9 +596,7 @@ void services_compute_xperms_drivers( node->datum.u.xperms->driver); } - /* If no ioctl commands are allowed, ignore auditallow and auditdeny */ - if (node->key.specified & AVTAB_XPERMS_ALLOWED) - xperms->len = 1; + xperms->len = 1; } /* diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 7314196185d1..c367d36965d4 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -175,9 +175,10 @@ int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) */ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - const struct flowi *fl) + const struct flowi_common *flic) { u32 state_sid; + u32 flic_sid; if (!xp->security) if (x->security) @@ -196,17 +197,17 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, return 0; state_sid = x->security->ctx_sid; + flic_sid = flic->flowic_secid; - if (fl->flowi_secid != state_sid) + if (flic_sid != state_sid) return 0; /* We don't need a separate SA Vs. policy polmatch check since the SA * is now of the same label as the flow and a flow Vs. policy polmatch * check had already happened in selinux_xfrm_policy_lookup() above. */ - return (avc_has_perm(&selinux_state, - fl->flowi_secid, state_sid, - SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, - NULL) ? 0 : 1); + return (avc_has_perm(&selinux_state, flic_sid, state_sid, + SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, + NULL) ? 0 : 1); } static u32 selinux_xfrm_skb_sid_egress(struct sk_buff *skb) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 3b4e557be597..d6287ce1a64f 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3871,7 +3871,6 @@ static struct smack_known *smack_from_netlbl(const struct sock *sk, u16 family, struct netlbl_lsm_secattr secattr; struct socket_smack *ssp = NULL; struct smack_known *skp = NULL; - int rc; netlbl_secattr_init(&secattr); @@ -3881,7 +3880,7 @@ static struct smack_known *smack_from_netlbl(const struct sock *sk, u16 family, if (netlbl_skbuff_getattr(skb, family, &secattr) == 0) { skp = smack_from_secattr(&secattr, ssp); if (secattr.flags & NETLBL_SECATTR_CACHEABLE) - rc = netlbl_cache_add(skb, family, &skp->smk_netlabel); + netlbl_cache_add(skb, family, &skp->smk_netlabel); } netlbl_secattr_destroy(&secattr); diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index e567b4baf3a0..5d44b7d258ef 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -1942,7 +1942,7 @@ static void smk_list_swap_rcu(struct list_head *public, * smk_parse_label_list - parse list of Smack labels, separated by spaces * * @data: the string to parse - * @private: destination list + * @list: destination list * * Returns zero on success or error code, as appropriate */ @@ -1973,7 +1973,7 @@ static int smk_parse_label_list(char *data, struct list_head *list) /** * smk_destroy_label_list - destroy a list of smack_known_list_elem - * @head: header pointer of the list to destroy + * @list: header pointer of the list to destroy */ void smk_destroy_label_list(struct list_head *list) { @@ -2131,7 +2131,7 @@ static const struct file_operations smk_unconfined_ops = { * smk_read_logging - read() for /smack/logging * @filp: file pointer, not actually used * @buf: where to put the result - * @cn: maximum to send along + * @count: maximum to send along * @ppos: where to start * * Returns number of bytes read or error code, as appropriate @@ -2272,6 +2272,7 @@ static const struct file_operations smk_load_self_ops = { * @buf: data from user space * @count: bytes sent * @ppos: where to start - must be 0 + * @format: /smack/load or /smack/load2 or /smack/change-rule format. */ static ssize_t smk_user_access(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int format) diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config index 64c19d8eba79..ad431a5178fb 100644 --- a/tools/testing/selftests/seccomp/config +++ b/tools/testing/selftests/seccomp/config @@ -1,3 +1,4 @@ +CONFIG_PID_NS=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y CONFIG_USER_NS=y diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 91f5a89cadac..fcc806585266 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -4,12 +4,16 @@ */ #define _GNU_SOURCE #include +#include +#include +#include #include #include #include #include #include #include +#include #include #include #include @@ -70,18 +74,74 @@ unsigned long long calibrate(void) return samples * seconds; } +bool approx(int i_one, int i_two) +{ + double one = i_one, one_bump = one * 0.01; + double two = i_two, two_bump = two * 0.01; + + one_bump = one + MAX(one_bump, 2.0); + two_bump = two + MAX(two_bump, 2.0); + + /* Equal to, or within 1% or 2 digits */ + if (one == two || + (one > two && one <= two_bump) || + (two > one && two <= one_bump)) + return true; + return false; +} + +bool le(int i_one, int i_two) +{ + if (i_one <= i_two) + return true; + return false; +} + +long compare(const char *name_one, const char *name_eval, const char *name_two, + unsigned long long one, bool (*eval)(int, int), unsigned long long two) +{ + bool good; + + printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, + (long long)one, name_eval, (long long)two); + if (one > INT_MAX) { + printf("Miscalculation! Measurement went negative: %lld\n", (long long)one); + return 1; + } + if (two > INT_MAX) { + printf("Miscalculation! Measurement went negative: %lld\n", (long long)two); + return 1; + } + + good = eval(one, two); + printf("%s\n", good ? "✔️" : "❌"); + + return good ? 0 : 1; +} + int main(int argc, char *argv[]) { + struct sock_filter bitmap_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog bitmap_prog = { + .len = (unsigned short)ARRAY_SIZE(bitmap_filter), + .filter = bitmap_filter, + }; struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, args[0])), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { .len = (unsigned short)ARRAY_SIZE(filter), .filter = filter, }; - long ret; - unsigned long long samples; - unsigned long long native, filter1, filter2; + + long ret, bits; + unsigned long long samples, calc; + unsigned long long native, filter1, filter2, bitmap1, bitmap2; + unsigned long long entry, per_filter1, per_filter2; printf("Current BPF sysctl settings:\n"); system("sysctl net.core.bpf_jit_enable"); @@ -101,35 +161,82 @@ int main(int argc, char *argv[]) ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); - /* One filter */ + /* One filter resulting in a bitmap */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); + assert(ret == 0); + + bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); + + /* Second filter resulting in a bitmap */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); + assert(ret == 0); + + bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); + + /* Third filter, can no longer be converted to bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1); + printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); - if (filter1 == native) - printf("No overhead measured!? Try running again with more samples.\n"); - - /* Two filters */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + /* Fourth filter, can not be converted to bitmap because of filter 3 */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2); + printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); - /* Calculations */ - printf("Estimated total seccomp overhead for 1 filter: %llu ns\n", - filter1 - native); + /* Estimations */ +#define ESTIMATE(fmt, var, what) do { \ + var = (what); \ + printf("Estimated " fmt ": %llu ns\n", var); \ + if (var > INT_MAX) \ + goto more_samples; \ + } while (0) - printf("Estimated total seccomp overhead for 2 filters: %llu ns\n", - filter2 - native); + ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc, + bitmap1 - native); + ESTIMATE("total seccomp overhead for 2 bitmapped filters", calc, + bitmap2 - native); + ESTIMATE("total seccomp overhead for 3 full filters", calc, + filter1 - native); + ESTIMATE("total seccomp overhead for 4 full filters", calc, + filter2 - native); + ESTIMATE("seccomp entry overhead", entry, + bitmap1 - native - (bitmap2 - bitmap1)); + ESTIMATE("seccomp per-filter overhead (last 2 diff)", per_filter1, + filter2 - filter1); + ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2, + (filter2 - native - entry) / 4); - printf("Estimated seccomp per-filter overhead: %llu ns\n", - filter2 - filter1); + printf("Expectations:\n"); + ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); + bits = compare("native", "≤", "1 filter", native, le, filter1); + if (bits) + goto more_samples; - printf("Estimated seccomp entry overhead: %llu ns\n", - filter1 - native - (filter2 - filter1)); + ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)", + per_filter1, approx, per_filter2); + bits = compare("1 bitmapped", "≈", "2 bitmapped", + bitmap1 - native, approx, bitmap2 - native); + if (bits) { + printf("Skipping constant action bitmap expectations: they appear unsupported.\n"); + goto out; + } + + ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native); + ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native); + ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total", + entry + (per_filter1 * 4) + native, approx, filter2); + if (ret == 0) + goto out; + +more_samples: + printf("Saw unexpected benchmark result. Try running again with more samples?\n"); +out: return 0; } diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings index ba4d85f74cd6..6091b45d226b 100644 --- a/tools/testing/selftests/seccomp/settings +++ b/tools/testing/selftests/seccomp/settings @@ -1 +1 @@ -timeout=90 +timeout=120