From 94798081732abfb5748471d5c3cced6ff187fa36 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 13 Feb 2026 18:52:43 -0800 Subject: [PATCH 01/67] driver core: platform: add kerneldoc to struct platform_device_info Add kernel documentation for struct platform_device_info and its individual members. While at it remove an extra indent level from the structure definition. Signed-off-by: Dmitry Torokhov Reviewed-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260214025246.2095239-2-dmitry.torokhov@gmail.com Signed-off-by: Danilo Krummrich --- include/linux/platform_device.h | 53 ++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 813da101b5bf..5f54217930e1 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -118,22 +118,53 @@ extern int platform_get_irq_byname_optional(struct platform_device *dev, const char *name); extern int platform_add_devices(struct platform_device **, int); +/** + * struct platform_device_info - set of parameters for creating a platform device + * @parent: parent device for the new platform device. + * @fwnode: firmware node associated with the device. + * @of_node_reused: indicates that device tree node associated with the device + * is shared with another device, typically its ancestor. Setting this to + * %true prevents the device from being matched via the OF match table, + * and stops the device core from automatically binding pinctrl + * configuration to avoid disrupting the other device. + * @name: name of the device. + * @id: instance ID of the device. Use %PLATFORM_DEVID_NONE if there is only + * one instance of the device, or %PLATFORM_DEVID_AUTO to let the + * kernel automatically assign a unique instance ID. + * @res: set of resources to attach to the device. + * @num_res: number of entries in @res. + * @data: device-specific data for this platform device. + * @size_data: size of device-specific data. + * @dma_mask: DMA mask for the device. + * @properties: a set of software properties for the device. If provided, + * a managed software node will be automatically created and + * assigned to the device. The properties array must be terminated + * with a sentinel entry. + * + * This structure is used to hold information needed to create and register + * a platform device using platform_device_register_full(). + * + * platform_device_register_full() makes deep copies of @name, @res, @data and + * @properties, so the caller does not need to keep them after registration. + * If the registration is performed during initialization, these can be marked + * as __initconst. + */ struct platform_device_info { - struct device *parent; - struct fwnode_handle *fwnode; - bool of_node_reused; + struct device *parent; + struct fwnode_handle *fwnode; + bool of_node_reused; - const char *name; - int id; + const char *name; + int id; - const struct resource *res; - unsigned int num_res; + const struct resource *res; + unsigned int num_res; - const void *data; - size_t size_data; - u64 dma_mask; + const void *data; + size_t size_data; + u64 dma_mask; - const struct property_entry *properties; + const struct property_entry *properties; }; extern struct platform_device *platform_device_register_full( const struct platform_device_info *pdevinfo); From 0fc434bc2c45fceb9356f2138911db0f454b8ca6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 13 Feb 2026 18:52:44 -0800 Subject: [PATCH 02/67] driver core: platform: allow attaching software nodes when creating devices Extend platform_device_info structure with an optional pointer to a software node to be used as a secondary firmware node for the device being created. If software node has not been registered yet it will be automatically registered. This reduces boilerplate needed when switching legacy board code to static device properties/GPIO references. Signed-off-by: Dmitry Torokhov Reviewed-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260214025246.2095239-3-dmitry.torokhov@gmail.com Signed-off-by: Danilo Krummrich --- drivers/base/platform.c | 9 ++++++++- include/linux/platform_device.h | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index b45d41b018ca..ec467ccd05b3 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -850,6 +850,9 @@ struct platform_device *platform_device_register_full( int ret; struct platform_device *pdev; + if (pdevinfo->swnode && pdevinfo->properties) + return ERR_PTR(-EINVAL); + pdev = platform_device_alloc(pdevinfo->name, pdevinfo->id); if (!pdev) return ERR_PTR(-ENOMEM); @@ -875,7 +878,11 @@ struct platform_device *platform_device_register_full( if (ret) goto err; - if (pdevinfo->properties) { + if (pdevinfo->swnode) { + ret = device_add_software_node(&pdev->dev, pdevinfo->swnode); + if (ret) + goto err; + } else if (pdevinfo->properties) { ret = device_create_managed_software_node(&pdev->dev, pdevinfo->properties, NULL); if (ret) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 5f54217930e1..754e4bf2771a 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -136,10 +136,14 @@ extern int platform_add_devices(struct platform_device **, int); * @data: device-specific data for this platform device. * @size_data: size of device-specific data. * @dma_mask: DMA mask for the device. + * @swnode: a secondary software node to be attached to the device. The node + * will be automatically registered and its lifetime tied to the platform + * device if it is not registered yet. * @properties: a set of software properties for the device. If provided, * a managed software node will be automatically created and * assigned to the device. The properties array must be terminated - * with a sentinel entry. + * with a sentinel entry. Specifying both @properties and @swnode is not + * allowed. * * This structure is used to hold information needed to create and register * a platform device using platform_device_register_full(). @@ -164,6 +168,7 @@ struct platform_device_info { size_t size_data; u64 dma_mask; + const struct software_node *swnode; const struct property_entry *properties; }; extern struct platform_device *platform_device_register_full( From 1b9a5bc8513d081c1bfe2c096b6dc502a4660f47 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 13 Feb 2026 18:52:45 -0800 Subject: [PATCH 03/67] driver core: platform: fix various formatting issues Make checkpatch happy. This helps when checkpatch is set up as an automatic linter. Signed-off-by: Dmitry Torokhov Reviewed-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260214025246.2095239-4-dmitry.torokhov@gmail.com Signed-off-by: Danilo Krummrich --- drivers/base/platform.c | 49 ++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index ec467ccd05b3..4617d1e88772 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -75,7 +75,7 @@ struct resource *platform_get_mem_or_io(struct platform_device *dev, for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; - if ((resource_type(r) & (IORESOURCE_MEM|IORESOURCE_IO)) && num-- == 0) + if ((resource_type(r) & (IORESOURCE_MEM | IORESOURCE_IO)) && num-- == 0) return r; } return NULL; @@ -97,7 +97,7 @@ EXPORT_SYMBOL_GPL(platform_get_mem_or_io); */ void __iomem * devm_platform_get_and_ioremap_resource(struct platform_device *pdev, - unsigned int index, struct resource **res) + unsigned int index, struct resource **res) { struct resource *r; @@ -172,7 +172,7 @@ static const struct cpumask *get_irq_affinity(struct platform_device *dev, * @num: interrupt number index * @affinity: optional cpumask pointer to get the affinity of a per-cpu interrupt * - * Gets an interupt for a platform device. Device drivers should check the + * Gets an interrupt for a platform device. Device drivers should check the * return value for errors so as to not pass a negative integer value to * the request_irq() APIs. Optional affinity information is provided in the * affinity pointer if available, and NULL otherwise. @@ -844,8 +844,7 @@ EXPORT_SYMBOL_GPL(platform_device_unregister); * * Returns &struct platform_device pointer on success, or ERR_PTR() on error. */ -struct platform_device *platform_device_register_full( - const struct platform_device_info *pdevinfo) +struct platform_device *platform_device_register_full(const struct platform_device_info *pdevinfo) { int ret; struct platform_device *pdev; @@ -868,13 +867,11 @@ struct platform_device *platform_device_register_full( pdev->dev.coherent_dma_mask = pdevinfo->dma_mask; } - ret = platform_device_add_resources(pdev, - pdevinfo->res, pdevinfo->num_res); + ret = platform_device_add_resources(pdev, pdevinfo->res, pdevinfo->num_res); if (ret) goto err; - ret = platform_device_add_data(pdev, - pdevinfo->data, pdevinfo->size_data); + ret = platform_device_add_data(pdev, pdevinfo->data, pdevinfo->size_data); if (ret) goto err; @@ -906,8 +903,7 @@ EXPORT_SYMBOL_GPL(platform_device_register_full); * @drv: platform driver structure * @owner: owning module/driver */ -int __platform_driver_register(struct platform_driver *drv, - struct module *owner) +int __platform_driver_register(struct platform_driver *drv, struct module *owner) { drv->driver.owner = owner; drv->driver.bus = &platform_bus_type; @@ -959,13 +955,14 @@ static int is_bound_to_driver(struct device *dev, void *driver) * a negative error code and with the driver not registered. */ int __init_or_module __platform_driver_probe(struct platform_driver *drv, - int (*probe)(struct platform_device *), struct module *module) + int (*probe)(struct platform_device *), + struct module *module) { int retval; if (drv->driver.probe_type == PROBE_PREFER_ASYNCHRONOUS) { pr_err("%s: drivers registered with %s can not be probed asynchronously\n", - drv->driver.name, __func__); + drv->driver.name, __func__); return -EINVAL; } @@ -1021,11 +1018,11 @@ EXPORT_SYMBOL_GPL(__platform_driver_probe); * * Returns &struct platform_device pointer on success, or ERR_PTR() on error. */ -struct platform_device * __init_or_module __platform_create_bundle( - struct platform_driver *driver, - int (*probe)(struct platform_device *), - struct resource *res, unsigned int n_res, - const void *data, size_t size, struct module *module) +struct platform_device * __init_or_module +__platform_create_bundle(struct platform_driver *driver, + int (*probe)(struct platform_device *), + struct resource *res, unsigned int n_res, + const void *data, size_t size, struct module *module) { struct platform_device *pdev; int error; @@ -1124,9 +1121,8 @@ void platform_unregister_drivers(struct platform_driver * const *drivers, } EXPORT_SYMBOL_GPL(platform_unregister_drivers); -static const struct platform_device_id *platform_match_id( - const struct platform_device_id *id, - struct platform_device *pdev) +static const struct platform_device_id * +platform_match_id(const struct platform_device_id *id, struct platform_device *pdev) { while (id->name[0]) { if (strcmp(pdev->name, id->name) == 0) { @@ -1348,13 +1344,12 @@ static struct attribute *platform_dev_attrs[] = { NULL, }; -static umode_t platform_dev_attrs_visible(struct kobject *kobj, struct attribute *a, - int n) +static umode_t platform_dev_attrs_visible(struct kobject *kobj, + struct attribute *a, int n) { struct device *dev = container_of(kobj, typeof(*dev), kobj); - if (a == &dev_attr_numa_node.attr && - dev_to_node(dev) == NUMA_NO_NODE) + if (a == &dev_attr_numa_node.attr && dev_to_node(dev) == NUMA_NO_NODE) return 0; return a->mode; @@ -1366,7 +1361,6 @@ static const struct attribute_group platform_dev_group = { }; __ATTRIBUTE_GROUPS(platform_dev); - /** * platform_match - bind platform device to platform driver. * @dev: device. @@ -1419,8 +1413,7 @@ static int platform_uevent(const struct device *dev, struct kobj_uevent_env *env if (rc != -ENODEV) return rc; - add_uevent_var(env, "MODALIAS=%s%s", PLATFORM_MODULE_PREFIX, - pdev->name); + add_uevent_var(env, "MODALIAS=%s%s", PLATFORM_MODULE_PREFIX, pdev->name); return 0; } From d1880d5f5899c572337ceb3d7e067052b22597e1 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 19 Feb 2026 09:12:29 +0000 Subject: [PATCH 04/67] rust: irq: move 'static bounds to traits The 'static bound is required by all irq handlers, so it is simpler to specify it on the trait declaration instead of repeating it every time the trait is used as a where clause. Note that we already list Sync on the trait bound for the same reason. Signed-off-by: Alice Ryhl Reviewed-by: Gary Guo Reviewed-by: Benno Lossin Link: https://patch.msgid.link/20260219-irq-static-on-trait-v1-1-6ede6b743ea3@google.com Signed-off-by: Danilo Krummrich --- rust/kernel/irq/request.rs | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/rust/kernel/irq/request.rs b/rust/kernel/irq/request.rs index 7a36f790593e..f425fe12f7c8 100644 --- a/rust/kernel/irq/request.rs +++ b/rust/kernel/irq/request.rs @@ -27,7 +27,7 @@ pub enum IrqReturn { } /// Callbacks for an IRQ handler. -pub trait Handler: Sync { +pub trait Handler: Sync + 'static { /// The hard IRQ handler. /// /// This is executed in interrupt context, hence all corresponding @@ -45,7 +45,7 @@ fn handle(&self, device: &Device) -> IrqReturn { } } -impl Handler for Box { +impl Handler for Box { fn handle(&self, device: &Device) -> IrqReturn { T::handle(self, device) } @@ -181,7 +181,7 @@ pub fn irq(&self) -> u32 { /// /// * We own an irq handler whose cookie is a pointer to `Self`. #[pin_data] -pub struct Registration { +pub struct Registration { #[pin] inner: Devres, @@ -194,7 +194,7 @@ pub struct Registration { _pin: PhantomPinned, } -impl Registration { +impl Registration { /// Registers the IRQ handler with the system for the given IRQ number. pub fn new<'a>( request: IrqRequest<'a>, @@ -260,10 +260,7 @@ pub fn synchronize(&self, dev: &Device) -> Result { /// # Safety /// /// This function should be only used as the callback in `request_irq`. -unsafe extern "C" fn handle_irq_callback( - _irq: i32, - ptr: *mut c_void, -) -> c_uint { +unsafe extern "C" fn handle_irq_callback(_irq: i32, ptr: *mut c_void) -> c_uint { // SAFETY: `ptr` is a pointer to `Registration` set in `Registration::new` let registration = unsafe { &*(ptr as *const Registration) }; // SAFETY: The irq callback is removed before the device is unbound, so the fact that the irq @@ -287,7 +284,7 @@ pub enum ThreadedIrqReturn { } /// Callbacks for a threaded IRQ handler. -pub trait ThreadedHandler: Sync { +pub trait ThreadedHandler: Sync + 'static { /// The hard IRQ handler. /// /// This is executed in interrupt context, hence all corresponding @@ -318,7 +315,7 @@ fn handle_threaded(&self, device: &Device) -> IrqReturn { } } -impl ThreadedHandler for Box { +impl ThreadedHandler for Box { fn handle(&self, device: &Device) -> ThreadedIrqReturn { T::handle(self, device) } @@ -401,7 +398,7 @@ fn handle_threaded(&self, device: &Device) -> IrqReturn { /// /// * We own an irq handler whose cookie is a pointer to `Self`. #[pin_data] -pub struct ThreadedRegistration { +pub struct ThreadedRegistration { #[pin] inner: Devres, @@ -414,7 +411,7 @@ pub struct ThreadedRegistration { _pin: PhantomPinned, } -impl ThreadedRegistration { +impl ThreadedRegistration { /// Registers the IRQ handler with the system for the given IRQ number. pub fn new<'a>( request: IrqRequest<'a>, @@ -481,7 +478,7 @@ pub fn synchronize(&self, dev: &Device) -> Result { /// # Safety /// /// This function should be only used as the callback in `request_threaded_irq`. -unsafe extern "C" fn handle_threaded_irq_callback( +unsafe extern "C" fn handle_threaded_irq_callback( _irq: i32, ptr: *mut c_void, ) -> c_uint { @@ -497,10 +494,7 @@ pub fn synchronize(&self, dev: &Device) -> Result { /// # Safety /// /// This function should be only used as the callback in `request_threaded_irq`. -unsafe extern "C" fn thread_fn_callback( - _irq: i32, - ptr: *mut c_void, -) -> c_uint { +unsafe extern "C" fn thread_fn_callback(_irq: i32, ptr: *mut c_void) -> c_uint { // SAFETY: `ptr` is a pointer to `ThreadedRegistration` set in `ThreadedRegistration::new` let registration = unsafe { &*(ptr as *const ThreadedRegistration) }; // SAFETY: The irq callback is removed before the device is unbound, so the fact that the irq From f917dc56060a10f401dd8ca46a1c5df237b35d84 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 2 Mar 2026 16:01:50 -0800 Subject: [PATCH 05/67] device core: Fix kernel-doc warnings in base.h In preparation for adding new fields to 'struct device_private' fix up existing kernel-doc warnings in this header file of the form: Warning: drivers/base/base.h:59 struct member 'subsys' not described in 'subsys_private' Warning: drivers/base/base.h:59 struct member 'devices_kset' not described in 'subsys_private' Warning: drivers/base/base.h:59 struct member 'interfaces' not described in 'subsys_private' Warning: drivers/base/base.h:59 struct member 'mutex' not described in 'subsys_private' ...which are simple replacements of " - " with ": ". Add new descriptions for these previously undescribed fields: Warning: drivers/base/base.h:58 struct member 'drivers_autoprobe' not described in 'subsys_private' Warning: drivers/base/base.h:117 struct member 'deferred_probe_reason' not described in 'device_private' Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260303000207.1836586-3-dan.j.williams@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/base.h | 79 +++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 1af95ac68b77..3bc8e6fd06a8 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -13,27 +13,28 @@ #include /** - * struct subsys_private - structure to hold the private to the driver core portions of the bus_type/class structure. - * - * @subsys - the struct kset that defines this subsystem - * @devices_kset - the subsystem's 'devices' directory - * @interfaces - list of subsystem interfaces associated - * @mutex - protect the devices, and interfaces lists. - * - * @drivers_kset - the list of drivers associated - * @klist_devices - the klist to iterate over the @devices_kset - * @klist_drivers - the klist to iterate over the @drivers_kset - * @bus_notifier - the bus notifier list for anything that cares about things - * on this bus. - * @bus - pointer back to the struct bus_type that this structure is associated - * with. + * struct subsys_private - structure to hold the private to the driver core + * portions of the bus_type/class structure. + * @subsys: the struct kset that defines this subsystem + * @devices_kset: the subsystem's 'devices' directory + * @interfaces: list of subsystem interfaces associated + * @mutex: protect the devices, and interfaces lists. + * @drivers_kset: the list of drivers associated + * @klist_devices: the klist to iterate over the @devices_kset + * @klist_drivers: the klist to iterate over the @drivers_kset + * @bus_notifier: the bus notifier list for anything that cares about things + * on this bus. + * @drivers_autoprobe: gate whether new devices are automatically attached to + * registered drivers, or new drivers automatically attach + * to existing devices. + * @bus: pointer back to the struct bus_type that this structure is associated + * with. * @dev_root: Default device to use as the parent. - * - * @glue_dirs - "glue" directory to put in-between the parent device to - * avoid namespace conflicts - * @class - pointer back to the struct class that this structure is associated - * with. - * @lock_key: Lock class key for use by the lock validator + * @glue_dirs: "glue" directory to put in-between the parent device to + * avoid namespace conflicts + * @class: pointer back to the struct class that this structure is associated + * with. + * @lock_key: Lock class key for use by the lock validator * * This structure is the one that is the actual kobject allowing struct * bus_type/class to be statically allocated safely. Nothing outside of the @@ -98,24 +99,26 @@ struct driver_type { #endif /** - * struct device_private - structure to hold the private to the driver core portions of the device structure. - * - * @klist_children - klist containing all children of this device - * @knode_parent - node in sibling list - * @knode_driver - node in driver list - * @knode_bus - node in bus list - * @knode_class - node in class list - * @deferred_probe - entry in deferred_probe_list which is used to retry the - * binding of drivers which were unable to get all the resources needed by - * the device; typically because it depends on another driver getting - * probed first. - * @async_driver - pointer to device driver awaiting probe via async_probe - * @device - pointer back to the struct device that this structure is - * associated with. - * @driver_type - The type of the bound Rust driver. - * @dead - This device is currently either in the process of or has been - * removed from the system. Any asynchronous events scheduled for this - * device should exit without taking any action. + * struct device_private - structure to hold the private to the driver core + * portions of the device structure. + * @klist_children: klist containing all children of this device + * @knode_parent: node in sibling list + * @knode_driver: node in driver list + * @knode_bus: node in bus list + * @knode_class: node in class list + * @deferred_probe: entry in deferred_probe_list which is used to retry the + * binding of drivers which were unable to get all the + * resources needed by the device; typically because it depends + * on another driver getting probed first. + * @async_driver: pointer to device driver awaiting probe via async_probe + * @deferred_probe_reason: capture the -EPROBE_DEFER message emitted with + * dev_err_probe() for later retrieval via debugfs + * @device: pointer back to the struct device that this structure is + * associated with. + * @driver_type: The type of the bound Rust driver. + * @dead: This device is currently either in the process of or has been + * removed from the system. Any asynchronous events scheduled for this + * device should exit without taking any action. * * Nothing outside of the driver core should ever touch these fields. */ From 507d8ce13f5b91d5b4dca7bd4b4e4249e8021cca Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 25 Feb 2026 14:34:02 -0800 Subject: [PATCH 06/67] kernfs: Don't set_nlink for directories being removed If a directory is already in the process of removal its i_nlink count becomes irrelevant because its contents are also about to be removed and any pending filesystem operations on it or its contents will soon start to fail. So we can avoid setting it for directories already flagged for removal. This avoids a race in the next patch, which adds clearing of the i_nlink count for kernfs nodes being removed to support inotify delete events. Use protection from the kernfs_iattr_rwsem to avoid adding more contention to the kernfs_rwsem for calls to kernfs_refresh_inode. Signed-off-by: T.J. Mercier Tested-by: syzbot@syzkaller.appspotmail.com Link: https://patch.msgid.link/20260225223404.783173-2-tjmercier@google.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 2 ++ fs/kernfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 8d40c4b1db9f..d9a1707b2148 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1491,12 +1491,14 @@ static void __kernfs_remove(struct kernfs_node *kn) pr_debug("kernfs %s: removing\n", kernfs_rcu_name(kn)); /* prevent new usage by marking all nodes removing and deactivating */ + down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); pos = NULL; while ((pos = kernfs_next_descendant_post(pos, kn))) { pos->flags |= KERNFS_REMOVING; if (kernfs_active(pos)) atomic_add(KN_DEACTIVATED_BIAS, &pos->active); } + up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); /* deactivate and unlink the subtree node-by-node */ do { diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index a36aaee98dce..afdc4021e81a 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -178,7 +178,7 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode) */ set_inode_attr(inode, attrs); - if (kernfs_type(kn) == KERNFS_DIR) + if (kernfs_type(kn) == KERNFS_DIR && !(kn->flags & KERNFS_REMOVING)) set_nlink(inode, kn->dir.subdirs + 2); } From eea5d2bb34ba11dccd9c53f392dc50cf060150a9 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 25 Feb 2026 14:34:03 -0800 Subject: [PATCH 07/67] kernfs: Send IN_DELETE_SELF and IN_IGNORED Currently some kernfs files (e.g. cgroup.events, memory.events) support inotify watches for IN_MODIFY, but unlike with regular filesystems, they do not receive IN_DELETE_SELF or IN_IGNORED events when they are removed. This means inotify watches persist after file deletion until the process exits and the inotify file descriptor is cleaned up, or until inotify_rm_watch is called manually. This creates a problem for processes monitoring cgroups. For example, a service monitoring memory.events for memory.high breaches needs to know when a cgroup is removed to clean up its state. Where it's known that a cgroup is removed when all processes die, without IN_DELETE_SELF the service must resort to inefficient workarounds such as: 1) Periodically scanning procfs to detect process death (wastes CPU and is susceptible to PID reuse). 2) Holding a pidfd for every monitored cgroup (can exhaust file descriptors). This patch enables IN_DELETE_SELF and IN_IGNORED events for kernfs files and directories by clearing inode i_nlink values during removal. This allows VFS to make the necessary fsnotify calls so that userspace receives the inotify events. As a result, applications can rely on a single existing watch on a file of interest (e.g. memory.events) to receive notifications for both modifications and the eventual removal of the file, as well as automatic watch descriptor cleanup, simplifying userspace logic and improving efficiency. There is gap in this implementation for certain file removals due their unique nature in kernfs. Directory removals that trigger file removals occur through vfs_rmdir, which shrinks the dcache and emits fsnotify events after the rmdir operation; there is no issue here. However kernfs writes to particular files (e.g. cgroup.subtree_control) can also cause file removal, but vfs_write does not attempt to emit fsnotify events after the write operation, even if i_nlink counts are 0. As a usecase for monitoring this category of file removals is not known, they are left without having IN_DELETE or IN_DELETE_SELF events generated. Fanotify recursive monitoring also does not work for kernfs nodes that do not have inodes attached, as they are created on-demand in kernfs. Suggested-by: Jan Kara Signed-off-by: T.J. Mercier Tested-by: syzbot@syzkaller.appspotmail.com Acked-by: Tejun Heo Link: https://patch.msgid.link/20260225223404.783173-3-tjmercier@google.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 54 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index d9a1707b2148..715b651e35a0 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -486,7 +486,7 @@ void kernfs_put_active(struct kernfs_node *kn) * removers may invoke this function concurrently on @kn and all will * return after draining is complete. */ -static void kernfs_drain(struct kernfs_node *kn) +static void kernfs_drain(struct kernfs_node *kn, bool drop_supers) __releases(&kernfs_root(kn)->kernfs_rwsem) __acquires(&kernfs_root(kn)->kernfs_rwsem) { @@ -506,6 +506,8 @@ static void kernfs_drain(struct kernfs_node *kn) return; up_write(&root->kernfs_rwsem); + if (drop_supers) + up_read(&root->kernfs_supers_rwsem); if (kernfs_lockdep(kn)) { rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); @@ -524,6 +526,8 @@ static void kernfs_drain(struct kernfs_node *kn) if (kernfs_should_drain_open_files(kn)) kernfs_drain_open_files(kn); + if (drop_supers) + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); } @@ -1465,12 +1469,43 @@ void kernfs_show(struct kernfs_node *kn, bool show) kn->flags |= KERNFS_HIDDEN; if (kernfs_active(kn)) atomic_add(KN_DEACTIVATED_BIAS, &kn->active); - kernfs_drain(kn); + kernfs_drain(kn, false); } up_write(&root->kernfs_rwsem); } +/* + * This function enables VFS to send fsnotify events for deletions. + * There is gap in this implementation for certain file removals due their + * unique nature in kernfs. Directory removals that trigger file removals occur + * through vfs_rmdir, which shrinks the dcache and emits fsnotify events after + * the rmdir operation; there is no issue here. However kernfs writes to + * particular files (e.g. cgroup.subtree_control) can also cause file removal, + * but vfs_write does not attempt to emit fsnotify events after the write + * operation, even if i_nlink counts are 0. As a usecase for monitoring this + * category of file removals is not known, they are left without having + * IN_DELETE or IN_DELETE_SELF events generated. + * Fanotify recursive monitoring also does not work for kernfs nodes that do not + * have inodes attached, as they are created on-demand in kernfs. + */ +static void kernfs_clear_inode_nlink(struct kernfs_node *kn) +{ + struct kernfs_root *root = kernfs_root(kn); + struct kernfs_super_info *info; + + lockdep_assert_held_read(&root->kernfs_supers_rwsem); + + list_for_each_entry(info, &root->supers, node) { + struct inode *inode = ilookup(info->sb, kernfs_ino(kn)); + + if (inode) { + clear_nlink(inode); + iput(inode); + } + } +} + static void __kernfs_remove(struct kernfs_node *kn) { struct kernfs_node *pos, *parent; @@ -1479,6 +1514,7 @@ static void __kernfs_remove(struct kernfs_node *kn) if (!kn) return; + lockdep_assert_held_read(&kernfs_root(kn)->kernfs_supers_rwsem); lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); /* @@ -1512,7 +1548,7 @@ static void __kernfs_remove(struct kernfs_node *kn) */ kernfs_get(pos); - kernfs_drain(pos); + kernfs_drain(pos, true); parent = kernfs_parent(pos); /* * kernfs_unlink_sibling() succeeds once per node. Use it @@ -1522,9 +1558,11 @@ static void __kernfs_remove(struct kernfs_node *kn) struct kernfs_iattrs *ps_iattr = parent ? parent->iattr : NULL; - /* update timestamps on the parent */ down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); + kernfs_clear_inode_nlink(pos); + + /* update timestamps on the parent */ if (ps_iattr) { ktime_get_real_ts64(&ps_iattr->ia_ctime); ps_iattr->ia_mtime = ps_iattr->ia_ctime; @@ -1553,9 +1591,11 @@ void kernfs_remove(struct kernfs_node *kn) root = kernfs_root(kn); + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); __kernfs_remove(kn); up_write(&root->kernfs_rwsem); + up_read(&root->kernfs_supers_rwsem); } /** @@ -1646,6 +1686,7 @@ bool kernfs_remove_self(struct kernfs_node *kn) bool ret; struct kernfs_root *root = kernfs_root(kn); + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); kernfs_break_active_protection(kn); @@ -1675,7 +1716,9 @@ bool kernfs_remove_self(struct kernfs_node *kn) break; up_write(&root->kernfs_rwsem); + up_read(&root->kernfs_supers_rwsem); schedule(); + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); } finish_wait(waitq, &wait); @@ -1690,6 +1733,7 @@ bool kernfs_remove_self(struct kernfs_node *kn) kernfs_unbreak_active_protection(kn); up_write(&root->kernfs_rwsem); + up_read(&root->kernfs_supers_rwsem); return ret; } @@ -1716,6 +1760,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, } root = kernfs_root(parent); + down_read(&root->kernfs_supers_rwsem); down_write(&root->kernfs_rwsem); kn = kernfs_find_ns(parent, name, ns); @@ -1726,6 +1771,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, } up_write(&root->kernfs_rwsem); + up_read(&root->kernfs_supers_rwsem); if (kn) return 0; From 2de27980e1d46e5dac586b1785edee7849a6e705 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 25 Feb 2026 14:34:04 -0800 Subject: [PATCH 08/67] selftests: memcg: Add tests for IN_DELETE_SELF and IN_IGNORED Add two new tests that verify inotify events are sent when memcg files or directories are removed with rmdir. Signed-off-by: T.J. Mercier Acked-by: Tejun Heo Acked-by: Amir Goldstein Tested-by: syzbot@syzkaller.appspotmail.com Link: https://patch.msgid.link/20260225223404.783173-4-tjmercier@google.com Signed-off-by: Greg Kroah-Hartman --- .../selftests/cgroup/test_memcontrol.c | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 2fb096a2a9f9..ea05a2524d0f 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -1643,6 +1644,115 @@ static int test_memcg_oom_group_score_events(const char *root) return ret; } +static int read_event(int inotify_fd, int expected_event, int expected_wd) +{ + struct inotify_event event; + ssize_t len = 0; + + len = read(inotify_fd, &event, sizeof(event)); + if (len < (ssize_t)sizeof(event)) + return -1; + + if (event.mask != expected_event || event.wd != expected_wd) { + fprintf(stderr, + "event does not match expected values: mask %d (expected %d) wd %d (expected %d)\n", + event.mask, expected_event, event.wd, expected_wd); + return -1; + } + + return 0; +} + +static int test_memcg_inotify_delete_file(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg = NULL; + int fd, wd; + + memcg = cg_name(root, "memcg_test_0"); + + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + fd = inotify_init1(0); + if (fd == -1) + goto cleanup; + + wd = inotify_add_watch(fd, cg_control(memcg, "memory.events"), IN_DELETE_SELF); + if (wd == -1) + goto cleanup; + + if (cg_destroy(memcg)) + goto cleanup; + free(memcg); + memcg = NULL; + + if (read_event(fd, IN_DELETE_SELF, wd)) + goto cleanup; + + if (read_event(fd, IN_IGNORED, wd)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (fd >= 0) + close(fd); + if (memcg) + cg_destroy(memcg); + free(memcg); + + return ret; +} + +static int test_memcg_inotify_delete_dir(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg = NULL; + int fd, wd; + + memcg = cg_name(root, "memcg_test_0"); + + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + fd = inotify_init1(0); + if (fd == -1) + goto cleanup; + + wd = inotify_add_watch(fd, memcg, IN_DELETE_SELF); + if (wd == -1) + goto cleanup; + + if (cg_destroy(memcg)) + goto cleanup; + free(memcg); + memcg = NULL; + + if (read_event(fd, IN_DELETE_SELF, wd)) + goto cleanup; + + if (read_event(fd, IN_IGNORED, wd)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (fd >= 0) + close(fd); + if (memcg) + cg_destroy(memcg); + free(memcg); + + return ret; +} + #define T(x) { x, #x } struct memcg_test { int (*fn)(const char *root); @@ -1662,6 +1772,8 @@ struct memcg_test { T(test_memcg_oom_group_leaf_events), T(test_memcg_oom_group_parent_events), T(test_memcg_oom_group_score_events), + T(test_memcg_inotify_delete_file), + T(test_memcg_inotify_delete_dir), }; #undef T From 59621105ffca7a33955f56bc7dee0923992f5832 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 23 Feb 2026 14:37:16 +0100 Subject: [PATCH 09/67] of: provide of_machine_read_compatible() Provide a helper function allowing users to read the compatible string of the machine, hiding the access to the root node. Reviewed-by: Christophe Leroy (CS GROUP) Signed-off-by: Bartosz Golaszewski Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260223-soc-of-root-v2-1-b45da45903c8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/of/base.c | 15 +++++++++++++++ include/linux/of.h | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index 57420806c1a2..b70aec32e0e3 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -434,6 +434,21 @@ bool of_machine_compatible_match(const char *const *compats) } EXPORT_SYMBOL(of_machine_compatible_match); +/** + * of_machine_read_compatible - Get the compatible string of this machine + * @compatible: address at which the address of the compatible string will be + * stored + * @index: index of the compatible entry in the list + * + * Returns: + * 0 on success, negative error number on failure. + */ +int of_machine_read_compatible(const char **compatible, unsigned int index) +{ + return of_property_read_string_index(of_root, "compatible", index, compatible); +} +EXPORT_SYMBOL_GPL(of_machine_read_compatible); + /** * of_machine_device_match - Test root of device tree against a of_device_id array * @matches: NULL terminated array of of_device_id match structures to search in diff --git a/include/linux/of.h b/include/linux/of.h index be6ec4916adf..7df971d52b55 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -426,6 +426,8 @@ static inline bool of_machine_is_compatible(const char *compat) return of_machine_compatible_match(compats); } +int of_machine_read_compatible(const char **compatible, unsigned int index); + extern int of_add_property(struct device_node *np, struct property *prop); extern int of_remove_property(struct device_node *np, struct property *prop); extern int of_update_property(struct device_node *np, struct property *newprop); @@ -851,6 +853,12 @@ static inline int of_machine_is_compatible(const char *compat) return 0; } +static inline int of_machine_read_compatible(const char **compatible, + unsigned int index) +{ + return -ENOSYS; +} + static inline int of_add_property(struct device_node *np, struct property *prop) { return 0; From c86d3b7b847cc9b32a17117cfd71679e4315fd9f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 23 Feb 2026 14:37:17 +0100 Subject: [PATCH 10/67] of: provide of_machine_read_model() Provide a helper function allowing users to read the model string of the machine, hiding the access to the root node. Signed-off-by: Bartosz Golaszewski Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260223-soc-of-root-v2-2-b45da45903c8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/of/base.c | 13 +++++++++++++ include/linux/of.h | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index b70aec32e0e3..bf4a51887d74 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -449,6 +449,19 @@ int of_machine_read_compatible(const char **compatible, unsigned int index) } EXPORT_SYMBOL_GPL(of_machine_read_compatible); +/** + * of_machine_read_model - Get the model string of this machine + * @model: address at which the address of the model string will be stored + * + * Returns: + * 0 on success, negative error number on failure. + */ +int of_machine_read_model(const char **model) +{ + return of_property_read_string(of_root, "model", model); +} +EXPORT_SYMBOL_GPL(of_machine_read_model); + /** * of_machine_device_match - Test root of device tree against a of_device_id array * @matches: NULL terminated array of of_device_id match structures to search in diff --git a/include/linux/of.h b/include/linux/of.h index 7df971d52b55..2b95777f16f6 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -427,6 +427,7 @@ static inline bool of_machine_is_compatible(const char *compat) } int of_machine_read_compatible(const char **compatible, unsigned int index); +int of_machine_read_model(const char **model); extern int of_add_property(struct device_node *np, struct property *prop); extern int of_remove_property(struct device_node *np, struct property *prop); @@ -859,6 +860,11 @@ static inline int of_machine_read_compatible(const char **compatible, return -ENOSYS; } +static inline int of_machine_read_model(const char **model) +{ + return -ENOSYS; +} + static inline int of_add_property(struct device_node *np, struct property *prop) { return 0; From e06c3b137907ad93daab6ca7e63aa9b68b2486ea Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 23 Feb 2026 14:37:18 +0100 Subject: [PATCH 11/67] base: soc: order includes alphabetically For easier readability and maintenance, order the included headers alphabetically. Reviewed-by: Christophe Leroy (CS GROUP) Signed-off-by: Bartosz Golaszewski Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260223-soc-of-root-v2-3-b45da45903c8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/soc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/base/soc.c b/drivers/base/soc.c index c8d3db9daa2f..48e2f0dbd330 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -5,16 +5,16 @@ * Author: Lee Jones for ST-Ericsson. */ -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include static DEFINE_IDA(soc_ida); From 030706e954c10749da8c75464c6b02cb30cb00aa Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 23 Feb 2026 14:37:19 +0100 Subject: [PATCH 12/67] base: soc: rename and export soc_device_get_machine() Some SoC drivers reimplement the functionality of soc_device_get_machine(). Make this function accessible through the sys_soc.h header and rename it to a more descriptive name. Reviewed-by: Christophe Leroy (CS GROUP) Signed-off-by: Bartosz Golaszewski Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260223-soc-of-root-v2-4-b45da45903c8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/soc.c | 13 +++++-------- include/linux/sys_soc.h | 10 ++++++++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/base/soc.c b/drivers/base/soc.c index 48e2f0dbd330..65ce72d49230 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -111,17 +111,14 @@ static void soc_release(struct device *dev) kfree(soc_dev); } -static void soc_device_get_machine(struct soc_device_attribute *soc_dev_attr) +int soc_attr_read_machine(struct soc_device_attribute *soc_dev_attr) { - struct device_node *np; - if (soc_dev_attr->machine) - return; + return -EBUSY; - np = of_find_node_by_path("/"); - of_property_read_string(np, "model", &soc_dev_attr->machine); - of_node_put(np); + return of_machine_read_model(&soc_dev_attr->machine); } +EXPORT_SYMBOL_GPL(soc_attr_read_machine); static struct soc_device_attribute *early_soc_dev_attr; @@ -131,7 +128,7 @@ struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr const struct attribute_group **soc_attr_groups; int ret; - soc_device_get_machine(soc_dev_attr); + soc_attr_read_machine(soc_dev_attr); if (!soc_bus_registered) { if (early_soc_dev_attr) diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h index d9b3cf0f410c..f19f5cec18e2 100644 --- a/include/linux/sys_soc.h +++ b/include/linux/sys_soc.h @@ -37,6 +37,16 @@ void soc_device_unregister(struct soc_device *soc_dev); */ struct device *soc_device_to_device(struct soc_device *soc); +/** + * soc_attr_read_machine - retrieve the machine model and store it in + * the soc_device_attribute structure + * @soc_dev_attr: SoC attribute structure to store the model in + * + * Returns: + * 0 on success, negative error number on failure. + */ +int soc_attr_read_machine(struct soc_device_attribute *soc_dev_attr); + #ifdef CONFIG_SOC_BUS const struct soc_device_attribute *soc_device_match( const struct soc_device_attribute *matches); From db0622ef4e65601489522c7bfe87409f4e60835c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 23 Feb 2026 14:37:20 +0100 Subject: [PATCH 13/67] soc: fsl: guts: don't access of_root directly Don't access of_root directly as it reduces the build test coverage for this driver with COMPILE_TEST=y and OF=n. Use existing helper functions to retrieve the relevant information. Suggested-by: Rob Herring Reviewed-by: Christophe Leroy (CS GROUP) Acked-by: Christophe Leroy (CS GROUP) Signed-off-by: Bartosz Golaszewski Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260223-soc-of-root-v2-5-b45da45903c8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/soc/fsl/guts.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c index 40afb27b582b..9bee7baec2b9 100644 --- a/drivers/soc/fsl/guts.c +++ b/drivers/soc/fsl/guts.c @@ -186,7 +186,6 @@ static int __init fsl_guts_init(void) const struct fsl_soc_data *soc_data; const struct of_device_id *match; struct ccsr_guts __iomem *regs; - const char *machine = NULL; struct device_node *np; bool little_endian; u64 soc_uid = 0; @@ -217,13 +216,9 @@ static int __init fsl_guts_init(void) if (!soc_dev_attr) return -ENOMEM; - if (of_property_read_string(of_root, "model", &machine)) - of_property_read_string_index(of_root, "compatible", 0, &machine); - if (machine) { - soc_dev_attr->machine = kstrdup(machine, GFP_KERNEL); - if (!soc_dev_attr->machine) - goto err_nomem; - } + ret = soc_attr_read_machine(soc_dev_attr); + if (ret) + of_machine_read_compatible(&soc_dev_attr->machine, 0); soc_die = fsl_soc_die_match(svr, fsl_soc_die); if (soc_die) { @@ -267,7 +262,6 @@ static int __init fsl_guts_init(void) err_nomem: ret = -ENOMEM; err: - kfree(soc_dev_attr->machine); kfree(soc_dev_attr->family); kfree(soc_dev_attr->soc_id); kfree(soc_dev_attr->revision); From 2524b293a59e586afd06358d0b191ab57208a920 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 23 Feb 2026 14:37:21 +0100 Subject: [PATCH 14/67] soc: imx8m: don't access of_root directly Don't access of_root directly as it reduces the build test coverage for this driver with COMPILE_TEST=y and OF=n. Use existing helper functions to retrieve the relevant information. Suggested-by: Rob Herring Signed-off-by: Bartosz Golaszewski Reviewed-by: Peng Fan Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260223-soc-of-root-v2-6-b45da45903c8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/soc/imx/soc-imx8m.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/soc/imx/soc-imx8m.c b/drivers/soc/imx/soc-imx8m.c index 8e2322999f09..77763a107edb 100644 --- a/drivers/soc/imx/soc-imx8m.c +++ b/drivers/soc/imx/soc-imx8m.c @@ -226,7 +226,6 @@ static int imx8m_soc_probe(struct platform_device *pdev) const struct imx8_soc_data *data; struct imx8_soc_drvdata *drvdata; struct device *dev = &pdev->dev; - const struct of_device_id *id; struct soc_device *soc_dev; u32 soc_rev = 0; u64 soc_uid[2] = {0, 0}; @@ -244,15 +243,11 @@ static int imx8m_soc_probe(struct platform_device *pdev) soc_dev_attr->family = "Freescale i.MX"; - ret = of_property_read_string(of_root, "model", &soc_dev_attr->machine); + ret = soc_attr_read_machine(soc_dev_attr); if (ret) return ret; - id = of_match_node(imx8_soc_match, of_root); - if (!id) - return -ENODEV; - - data = id->data; + data = device_get_match_data(dev); if (data) { soc_dev_attr->soc_id = data->name; ret = imx8m_soc_prepare(pdev, data->ocotp_compatible); @@ -326,7 +321,7 @@ static int __init imx8_soc_init(void) int ret; /* No match means this is non-i.MX8M hardware, do nothing. */ - if (!of_match_node(imx8_soc_match, of_root)) + if (!of_machine_device_match(imx8_soc_match)) return 0; ret = platform_driver_register(&imx8m_soc_driver); From 01898f5ed659796bb3eba4bc3ac5177317942e24 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 23 Feb 2026 14:37:22 +0100 Subject: [PATCH 15/67] soc: imx9: don't access of_root directly Don't access of_root directly as it reduces the build test coverage for this driver with COMPILE_TEST=y and OF=n. Use existing helper functions to retrieve the relevant information. Suggested-by: Rob Herring Reviewed-by: Peng Fan Signed-off-by: Bartosz Golaszewski Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260223-soc-of-root-v2-7-b45da45903c8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/soc/imx/soc-imx9.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/soc/imx/soc-imx9.c b/drivers/soc/imx/soc-imx9.c index d67bc7402b10..58eef7d4f908 100644 --- a/drivers/soc/imx/soc-imx9.c +++ b/drivers/soc/imx/soc-imx9.c @@ -30,7 +30,7 @@ static int imx9_soc_probe(struct platform_device *pdev) if (!attr) return -ENOMEM; - err = of_property_read_string(of_root, "model", &attr->machine); + err = soc_attr_read_machine(attr); if (err) return dev_err_probe(dev, err, "%s: missing model property\n", __func__); @@ -89,7 +89,7 @@ static int __init imx9_soc_init(void) struct platform_device *pdev; /* No match means it is not an i.MX 9 series SoC, do nothing. */ - if (!of_match_node(imx9_soc_match, of_root)) + if (!of_machine_device_match(imx9_soc_match)) return 0; ret = platform_driver_register(&imx9_soc_driver); From bb729bf1d6fdf5c2087c1651165c74cef0da1742 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Tue, 10 Mar 2026 23:57:53 +0800 Subject: [PATCH 16/67] driver core: Add conditional guard support for device_lock() Introduce conditional guard version of device_lock() for scenarios that require conditional device lock holding. Suggested-by: Dan Williams Reviewed-by: Dan Williams Acked-by: Greg Kroah-Hartman Signed-off-by: Li Ming Link: https://patch.msgid.link/20260310-fix_access_endpoint_without_drv_check-v1-1-94fe919a0b87@zohomail.com Signed-off-by: Danilo Krummrich --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/device.h b/include/linux/device.h index 0be95294b6e6..4fafee80524b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -911,6 +911,7 @@ static inline void device_unlock(struct device *dev) } DEFINE_GUARD(device, struct device *, device_lock(_T), device_unlock(_T)) +DEFINE_GUARD_COND(device, _intr, device_lock_interruptible(_T), _RET == 0) static inline void device_lock_assert(struct device *dev) { From 15949f153059275d70a5448a17e429af51e3560c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 23 Feb 2026 14:37:24 +0100 Subject: [PATCH 17/67] soc: sunxi: mbus: don't access of_root directly Don't access of_root directly as it reduces the build test coverage for this driver with COMPILE_TEST=y and OF=n. Use existing helper functions to retrieve the relevant information. Suggested-by: Rob Herring Acked-by: Jernej Skrabec Signed-off-by: Bartosz Golaszewski Reviewed-by: Rob Herring (Arm) Link: https://patch.msgid.link/20260223-soc-of-root-v2-9-b45da45903c8@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- drivers/soc/sunxi/sunxi_mbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/soc/sunxi/sunxi_mbus.c b/drivers/soc/sunxi/sunxi_mbus.c index 1734da357ca2..8bc5f62ff258 100644 --- a/drivers/soc/sunxi/sunxi_mbus.c +++ b/drivers/soc/sunxi/sunxi_mbus.c @@ -118,7 +118,7 @@ static const char * const sunxi_mbus_platforms[] __initconst = { static int __init sunxi_mbus_init(void) { - if (!of_device_compatible_match(of_root, sunxi_mbus_platforms)) + if (!of_machine_compatible_match(sunxi_mbus_platforms)) return 0; bus_register_notifier(&platform_bus_type, &sunxi_mbus_nb); From fe2511adb1fc1814df06ca11e0d8a92f792e4029 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 1 Mar 2026 13:30:17 +0100 Subject: [PATCH 18/67] sysfs: constify group arrays in function arguments Constify the groups array argument where applicable. This allows to pass constant arrays as arguments. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/17035265-8882-4101-b7a7-16b3eb94f8b5@gmail.com Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/group.c | 10 +++++----- include/linux/sysfs.h | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index e1e639f515a0..b3edae0578c0 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -217,7 +217,7 @@ int sysfs_create_group(struct kobject *kobj, EXPORT_SYMBOL_GPL(sysfs_create_group); static int internal_create_groups(struct kobject *kobj, int update, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { int error = 0; int i; @@ -250,7 +250,7 @@ static int internal_create_groups(struct kobject *kobj, int update, * Returns 0 on success or error code from sysfs_create_group on failure. */ int sysfs_create_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { return internal_create_groups(kobj, 0, groups); } @@ -268,7 +268,7 @@ EXPORT_SYMBOL_GPL(sysfs_create_groups); * Returns 0 on success or error code from sysfs_update_group on failure. */ int sysfs_update_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { return internal_create_groups(kobj, 1, groups); } @@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(sysfs_remove_group); * If groups is not NULL, remove the specified groups from the kobject. */ void sysfs_remove_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { int i; @@ -613,7 +613,7 @@ EXPORT_SYMBOL_GPL(sysfs_group_change_owner); * Returns 0 on success or error code on failure. */ int sysfs_groups_change_owner(struct kobject *kobj, - const struct attribute_group **groups, + const struct attribute_group *const *groups, kuid_t kuid, kgid_t kgid) { int error = 0, i; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 99b775f3ff46..9777e9445dd5 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -445,15 +445,15 @@ void sysfs_delete_link(struct kobject *dir, struct kobject *targ, int __must_check sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp); int __must_check sysfs_create_groups(struct kobject *kobj, - const struct attribute_group **groups); + const struct attribute_group *const *groups); int __must_check sysfs_update_groups(struct kobject *kobj, - const struct attribute_group **groups); + const struct attribute_group *const *groups); int sysfs_update_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_remove_group(struct kobject *kobj, const struct attribute_group *grp); void sysfs_remove_groups(struct kobject *kobj, - const struct attribute_group **groups); + const struct attribute_group *const *groups); int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group); void sysfs_remove_file_from_group(struct kobject *kobj, @@ -486,7 +486,7 @@ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid); int sysfs_link_change_owner(struct kobject *kobj, struct kobject *targ, const char *name, kuid_t kuid, kgid_t kgid); int sysfs_groups_change_owner(struct kobject *kobj, - const struct attribute_group **groups, + const struct attribute_group *const *groups, kuid_t kuid, kgid_t kgid); int sysfs_group_change_owner(struct kobject *kobj, const struct attribute_group *groups, kuid_t kuid, @@ -629,13 +629,13 @@ static inline int sysfs_create_group(struct kobject *kobj, } static inline int sysfs_create_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { return 0; } static inline int sysfs_update_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { return 0; } @@ -652,7 +652,7 @@ static inline void sysfs_remove_group(struct kobject *kobj, } static inline void sysfs_remove_groups(struct kobject *kobj, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { } @@ -733,7 +733,7 @@ static inline int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t k } static inline int sysfs_groups_change_owner(struct kobject *kobj, - const struct attribute_group **groups, + const struct attribute_group *const *groups, kuid_t kuid, kgid_t kgid) { return 0; From ece5283706aff6791a37894bafbb0c134a94c0f3 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 1 Mar 2026 13:31:02 +0100 Subject: [PATCH 19/67] driver: core: constify groups array argument in device_add_groups and device_remove_groups Now that sysfs_create_groups() and sysfs_remove_groups() allow to pass constant groups arrays, we can constify the groups array argument also here. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/8ea2d6d1-0adb-4d7f-92bc-751e93ce08d6@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 5 +++-- include/linux/device.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 791f9e444df8..f497b724332a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2831,14 +2831,15 @@ static ssize_t removable_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(removable); -int device_add_groups(struct device *dev, const struct attribute_group **groups) +int device_add_groups(struct device *dev, + const struct attribute_group *const *groups) { return sysfs_create_groups(&dev->kobj, groups); } EXPORT_SYMBOL_GPL(device_add_groups); void device_remove_groups(struct device *dev, - const struct attribute_group **groups) + const struct attribute_group *const *groups) { sysfs_remove_groups(&dev->kobj, groups); } diff --git a/include/linux/device.h b/include/linux/device.h index 0be95294b6e6..48a0444ccc1e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1131,9 +1131,9 @@ device_create_with_groups(const struct class *cls, struct device *parent, dev_t void device_destroy(const struct class *cls, dev_t devt); int __must_check device_add_groups(struct device *dev, - const struct attribute_group **groups); + const struct attribute_group *const *groups); void device_remove_groups(struct device *dev, - const struct attribute_group **groups); + const struct attribute_group *const *groups); static inline int __must_check device_add_group(struct device *dev, const struct attribute_group *grp) From 10f874dc92b3f3bf96470d997bdf157b289c9d4c Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 1 Mar 2026 13:31:56 +0100 Subject: [PATCH 20/67] driver core: make struct class groups members constant arrays Constify the groups arrays, allowing to assign constant arrays. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/7ff56b07-09ca-4948-b98f-5bd37ceef21e@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/class.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/device/class.h b/include/linux/device/class.h index 65880e60c720..2079239a5aa5 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -50,8 +50,8 @@ struct fwnode_handle; struct class { const char *name; - const struct attribute_group **class_groups; - const struct attribute_group **dev_groups; + const struct attribute_group *const *class_groups; + const struct attribute_group *const *dev_groups; int (*dev_uevent)(const struct device *dev, struct kobj_uevent_env *env); char *(*devnode)(const struct device *dev, umode_t *mode); From 16de94a1b090864637c12bc6207e18d82d1972a1 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Fri, 13 Mar 2026 10:51:53 -0700 Subject: [PATCH 21/67] kernfs: Add missing documentation for kernfs_put_active's drop_supers argument The drop_supers argument was added to kernfs_put_active to control whether the kernfs_supers_rwsem is temporarily dropped along with the kernfs_rwsem, but no documentation was added for it. Fixes: eea5d2bb34ba ("kernfs: Send IN_DELETE_SELF and IN_IGNORED") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202603130112.2FcCzv1g-lkp@intel.com/ Signed-off-by: T.J. Mercier Link: https://patch.msgid.link/20260313175153.235681-1-tjmercier@google.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 715b651e35a0..7f355316704c 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -481,6 +481,8 @@ void kernfs_put_active(struct kernfs_node *kn) /** * kernfs_drain - drain kernfs_node * @kn: kernfs_node to drain + * @drop_supers: Set to true if this function is called with the + * kernfs_supers_rwsem locked. * * Drain existing usages and nuke all existing mmaps of @kn. Multiple * removers may invoke this function concurrently on @kn and all will From e2d599021c843d97ee38ba351cb0117eb984e038 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 6 Feb 2026 15:00:15 +0900 Subject: [PATCH 22/67] rust: io: turn IoCapable into a functional trait `IoCapable` is currently used as a marker trait to signal that the methods of the `Io` trait corresponding to `T` have been overridden by the implementor (the default implementations triggering a build-time error). This goes against the DRY principle and separates the signaling of the capability from its implementation, making it possible to forget a step while implementing a new `Io`. Another undesirable side-effect is that it makes the implementation of I/O backends boilerplate-y and convoluted: currently this is done using two levels of imbricated macros that generate unsafe code. Fix these issues by turning `IoCapable` into a functional trait that includes the raw implementation of the I/O access for `T` using unsafe methods that work with an arbitrary address. This allows us to turn the default methods of `Io` into regular methods that check the passed offset, turn it into an address, and call into the corresponding `IoCapable` functions, removing the need to overload them at all. `IoCapable` must still be implemented for all supported primitive types, which is still done more concisely using a macro, but this macro becomes much simpler and does not require calling into another one. Reviewed-by: Daniel Almeida Acked-by: Alice Ryhl Signed-off-by: Alexandre Courbot Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260206-io-v2-1-71dea20a06e6@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 169 ++++++++++++++++++++++++++++++++---------- rust/kernel/pci/io.rs | 37 ++++++++- 2 files changed, 163 insertions(+), 43 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index e5fba6bf6db0..ec78c614c959 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -320,14 +320,29 @@ const fn offset_valid(offset: usize, size: usize) -> bool { } } -/// Marker trait indicating that an I/O backend supports operations of a certain type. +/// Trait indicating that an I/O backend supports operations of a certain type and providing an +/// implementation for these operations. /// /// Different I/O backends can implement this trait to expose only the operations they support. /// /// For example, a PCI configuration space may implement `IoCapable`, `IoCapable`, /// and `IoCapable`, but not `IoCapable`, while an MMIO region on a 64-bit /// system might implement all four. -pub trait IoCapable {} +pub trait IoCapable { + /// Performs an I/O read of type `T` at `address` and returns the result. + /// + /// # Safety + /// + /// The range `[address..address + size_of::()]` must be within the bounds of `Self`. + unsafe fn io_read(&self, address: usize) -> T; + + /// Performs an I/O write of `value` at `address`. + /// + /// # Safety + /// + /// The range `[address..address + size_of::()]` must be within the bounds of `Self`. + unsafe fn io_write(&self, value: T, address: usize); +} /// Types implementing this trait (e.g. MMIO BARs or PCI config regions) /// can perform I/O operations on regions of memory. @@ -369,146 +384,198 @@ fn io_addr(&self, offset: usize) -> Result { /// Fallible 8-bit read with runtime bounds check. #[inline(always)] - fn try_read8(&self, _offset: usize) -> Result + fn try_read8(&self, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 8-bit read") + let address = self.io_addr::(offset)?; + + // SAFETY: `address` has been validated by `io_addr`. + Ok(unsafe { self.io_read(address) }) } /// Fallible 16-bit read with runtime bounds check. #[inline(always)] - fn try_read16(&self, _offset: usize) -> Result + fn try_read16(&self, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 16-bit read") + let address = self.io_addr::(offset)?; + + // SAFETY: `address` has been validated by `io_addr`. + Ok(unsafe { self.io_read(address) }) } /// Fallible 32-bit read with runtime bounds check. #[inline(always)] - fn try_read32(&self, _offset: usize) -> Result + fn try_read32(&self, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 32-bit read") + let address = self.io_addr::(offset)?; + + // SAFETY: `address` has been validated by `io_addr`. + Ok(unsafe { self.io_read(address) }) } /// Fallible 64-bit read with runtime bounds check. #[inline(always)] - fn try_read64(&self, _offset: usize) -> Result + fn try_read64(&self, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 64-bit read") + let address = self.io_addr::(offset)?; + + // SAFETY: `address` has been validated by `io_addr`. + Ok(unsafe { self.io_read(address) }) } /// Fallible 8-bit write with runtime bounds check. #[inline(always)] - fn try_write8(&self, _value: u8, _offset: usize) -> Result + fn try_write8(&self, value: u8, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 8-bit write") + let address = self.io_addr::(offset)?; + + // SAFETY: `address` has been validated by `io_addr`. + unsafe { self.io_write(value, address) }; + Ok(()) } /// Fallible 16-bit write with runtime bounds check. #[inline(always)] - fn try_write16(&self, _value: u16, _offset: usize) -> Result + fn try_write16(&self, value: u16, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 16-bit write") + let address = self.io_addr::(offset)?; + + // SAFETY: `address` has been validated by `io_addr`. + unsafe { self.io_write(value, address) }; + Ok(()) } /// Fallible 32-bit write with runtime bounds check. #[inline(always)] - fn try_write32(&self, _value: u32, _offset: usize) -> Result + fn try_write32(&self, value: u32, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 32-bit write") + let address = self.io_addr::(offset)?; + + // SAFETY: `address` has been validated by `io_addr`. + unsafe { self.io_write(value, address) }; + Ok(()) } /// Fallible 64-bit write with runtime bounds check. #[inline(always)] - fn try_write64(&self, _value: u64, _offset: usize) -> Result + fn try_write64(&self, value: u64, offset: usize) -> Result where Self: IoCapable, { - build_error!("Backend does not support fallible 64-bit write") + let address = self.io_addr::(offset)?; + + // SAFETY: `address` has been validated by `io_addr`. + unsafe { self.io_write(value, address) }; + Ok(()) } /// Infallible 8-bit read with compile-time bounds check. #[inline(always)] - fn read8(&self, _offset: usize) -> u8 + fn read8(&self, offset: usize) -> u8 where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 8-bit read") + let address = self.io_addr_assert::(offset); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_read(address) } } /// Infallible 16-bit read with compile-time bounds check. #[inline(always)] - fn read16(&self, _offset: usize) -> u16 + fn read16(&self, offset: usize) -> u16 where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 16-bit read") + let address = self.io_addr_assert::(offset); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_read(address) } } /// Infallible 32-bit read with compile-time bounds check. #[inline(always)] - fn read32(&self, _offset: usize) -> u32 + fn read32(&self, offset: usize) -> u32 where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 32-bit read") + let address = self.io_addr_assert::(offset); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_read(address) } } /// Infallible 64-bit read with compile-time bounds check. #[inline(always)] - fn read64(&self, _offset: usize) -> u64 + fn read64(&self, offset: usize) -> u64 where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 64-bit read") + let address = self.io_addr_assert::(offset); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_read(address) } } /// Infallible 8-bit write with compile-time bounds check. #[inline(always)] - fn write8(&self, _value: u8, _offset: usize) + fn write8(&self, value: u8, offset: usize) where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 8-bit write") + let address = self.io_addr_assert::(offset); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_write(value, address) } } /// Infallible 16-bit write with compile-time bounds check. #[inline(always)] - fn write16(&self, _value: u16, _offset: usize) + fn write16(&self, value: u16, offset: usize) where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 16-bit write") + let address = self.io_addr_assert::(offset); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_write(value, address) } } /// Infallible 32-bit write with compile-time bounds check. #[inline(always)] - fn write32(&self, _value: u32, _offset: usize) + fn write32(&self, value: u32, offset: usize) where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 32-bit write") + let address = self.io_addr_assert::(offset); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_write(value, address) } } /// Infallible 64-bit write with compile-time bounds check. #[inline(always)] - fn write64(&self, _value: u64, _offset: usize) + fn write64(&self, value: u64, offset: usize) where Self: IoKnownSize + IoCapable, { - build_error!("Backend does not support infallible 64-bit write") + let address = self.io_addr_assert::(offset); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_write(value, address) } } } @@ -534,14 +601,36 @@ fn io_addr_assert(&self, offset: usize) -> usize { } } -// MMIO regions support 8, 16, and 32-bit accesses. -impl IoCapable for Mmio {} -impl IoCapable for Mmio {} -impl IoCapable for Mmio {} +/// Implements [`IoCapable`] on `$mmio` for `$ty` using `$read_fn` and `$write_fn`. +macro_rules! impl_mmio_io_capable { + ($mmio:ident, $(#[$attr:meta])* $ty:ty, $read_fn:ident, $write_fn:ident) => { + $(#[$attr])* + impl IoCapable<$ty> for $mmio { + unsafe fn io_read(&self, address: usize) -> $ty { + // SAFETY: By the trait invariant `address` is a valid address for MMIO operations. + unsafe { bindings::$read_fn(address as *const c_void) } + } + unsafe fn io_write(&self, value: $ty, address: usize) { + // SAFETY: By the trait invariant `address` is a valid address for MMIO operations. + unsafe { bindings::$write_fn(value, address as *mut c_void) } + } + } + }; +} + +// MMIO regions support 8, 16, and 32-bit accesses. +impl_mmio_io_capable!(Mmio, u8, readb, writeb); +impl_mmio_io_capable!(Mmio, u16, readw, writew); +impl_mmio_io_capable!(Mmio, u32, readl, writel); // MMIO regions on 64-bit systems also support 64-bit accesses. -#[cfg(CONFIG_64BIT)] -impl IoCapable for Mmio {} +impl_mmio_io_capable!( + Mmio, + #[cfg(CONFIG_64BIT)] + u64, + readq, + writeq +); impl Io for Mmio { /// Returns the base address of this mapping. diff --git a/rust/kernel/pci/io.rs b/rust/kernel/pci/io.rs index fb6edab2aea7..4feca8033eb4 100644 --- a/rust/kernel/pci/io.rs +++ b/rust/kernel/pci/io.rs @@ -142,10 +142,41 @@ macro_rules! call_config_write { }; } +/// Implements [`IoCapable`] on [`ConfigSpace`] for `$ty` using `$read_fn` and `$write_fn`. +macro_rules! impl_config_space_io_capable { + ($ty:ty, $read_fn:ident, $write_fn:ident) => { + impl<'a, S: ConfigSpaceKind> IoCapable<$ty> for ConfigSpace<'a, S> { + unsafe fn io_read(&self, address: usize) -> $ty { + let mut val: $ty = 0; + + // Return value from C function is ignored in infallible accessors. + let _ret = + // SAFETY: By the type invariant `self.pdev` is a valid address. + // CAST: The offset is cast to `i32` because the C functions expect a 32-bit + // signed offset parameter. PCI configuration space size is at most 4096 bytes, + // so the value always fits within `i32` without truncation or sign change. + unsafe { bindings::$read_fn(self.pdev.as_raw(), address as i32, &mut val) }; + + val + } + + unsafe fn io_write(&self, value: $ty, address: usize) { + // Return value from C function is ignored in infallible accessors. + let _ret = + // SAFETY: By the type invariant `self.pdev` is a valid address. + // CAST: The offset is cast to `i32` because the C functions expect a 32-bit + // signed offset parameter. PCI configuration space size is at most 4096 bytes, + // so the value always fits within `i32` without truncation or sign change. + unsafe { bindings::$write_fn(self.pdev.as_raw(), address as i32, value) }; + } + } + }; +} + // PCI configuration space supports 8, 16, and 32-bit accesses. -impl<'a, S: ConfigSpaceKind> IoCapable for ConfigSpace<'a, S> {} -impl<'a, S: ConfigSpaceKind> IoCapable for ConfigSpace<'a, S> {} -impl<'a, S: ConfigSpaceKind> IoCapable for ConfigSpace<'a, S> {} +impl_config_space_io_capable!(u8, pci_read_config_byte, pci_write_config_byte); +impl_config_space_io_capable!(u16, pci_read_config_word, pci_write_config_word); +impl_config_space_io_capable!(u32, pci_read_config_dword, pci_write_config_dword); impl<'a, S: ConfigSpaceKind> Io for ConfigSpace<'a, S> { /// Returns the base address of the I/O region. It is always 0 for configuration space. From 19103d4f93673c804ef82dd797cd2b935d0bf70f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 6 Feb 2026 15:00:16 +0900 Subject: [PATCH 23/67] rust: io: mem: use non-relaxed I/O ops in examples The `_relaxed` I/O variant methods are about to be replaced by a wrapper type exposing this access pattern with the regular methods of the `Io` trait. Thus replace the examples to use the regular I/O methods. Since these are examples, we want them to use the most standard ops anyway, and the relaxed variants were but an addition that was MMIO-specific. Reviewed-by: Daniel Almeida Acked-by: Alice Ryhl Signed-off-by: Alexandre Courbot Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260206-io-v2-2-71dea20a06e6@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/io/mem.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rust/kernel/io/mem.rs b/rust/kernel/io/mem.rs index 620022cff401..7dc78d547f7a 100644 --- a/rust/kernel/io/mem.rs +++ b/rust/kernel/io/mem.rs @@ -54,6 +54,7 @@ pub(crate) unsafe fn new(device: &'a Device, resource: &'a Resource) -> S /// use kernel::{ /// bindings, /// device::Core, + /// io::Io, /// of, /// platform, /// }; @@ -78,9 +79,9 @@ pub(crate) unsafe fn new(device: &'a Device, resource: &'a Resource) -> S /// let io = iomem.access(pdev.as_ref())?; /// /// // Read and write a 32-bit value at `offset`. - /// let data = io.read32_relaxed(offset); + /// let data = io.read32(offset); /// - /// io.write32_relaxed(data, offset); + /// io.write32(data, offset); /// /// # Ok(SampleDriver) /// } @@ -117,6 +118,7 @@ pub fn iomap_exclusive_sized( /// use kernel::{ /// bindings, /// device::Core, + /// io::Io, /// of, /// platform, /// }; @@ -141,9 +143,9 @@ pub fn iomap_exclusive_sized( /// /// let io = iomem.access(pdev.as_ref())?; /// - /// let data = io.try_read32_relaxed(offset)?; + /// let data = io.try_read32(offset)?; /// - /// io.try_write32_relaxed(data, offset)?; + /// io.try_write32(data, offset)?; /// /// # Ok(SampleDriver) /// } From 1d1c5c73d7e8f166b6b55ae06a3c509561b854cd Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 6 Feb 2026 15:00:17 +0900 Subject: [PATCH 24/67] rust: io: provide Mmio relaxed ops through a wrapper type Relaxed I/O accessors for `Mmio` are currently implemented as an extra set of methods that mirror the ones defined in `Io`, but with the `_relaxed` suffix. This makes these methods impossible to use with generic code, which is a highly plausible proposition now that we have the `Io` trait. Address this by adding a new `RelaxedMmio` wrapper type for `Mmio` that provides its own `IoCapable` implementations relying on the relaxed C accessors. This makes it possible to use relaxed operations on a `Mmio` simply by wrapping it, and to use `RelaxedMmio` in code generic against `Io`. Acked-by: Alice Ryhl Signed-off-by: Alexandre Courbot Reviewed-by: Daniel Almeida Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260206-io-v2-3-71dea20a06e6@nvidia.com [ Use kernel import style in examples. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index ec78c614c959..8a0e35070153 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -742,3 +742,69 @@ pub unsafe fn from_raw(raw: &MmioRaw) -> &Self { call_mmio_write(writeq_relaxed) <- u64 ); } + +/// [`Mmio`] wrapper using relaxed accessors. +/// +/// This type provides an implementation of [`Io`] that uses relaxed I/O MMIO operands instead of +/// the regular ones. +/// +/// See [`Mmio::relaxed`] for a usage example. +#[repr(transparent)] +pub struct RelaxedMmio(Mmio); + +impl Io for RelaxedMmio { + #[inline] + fn addr(&self) -> usize { + self.0.addr() + } + + #[inline] + fn maxsize(&self) -> usize { + self.0.maxsize() + } +} + +impl IoKnownSize for RelaxedMmio { + const MIN_SIZE: usize = SIZE; +} + +impl Mmio { + /// Returns a [`RelaxedMmio`] reference that performs relaxed I/O operations. + /// + /// Relaxed accessors do not provide ordering guarantees with respect to DMA or memory accesses + /// and can be used when such ordering is not required. + /// + /// # Examples + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// RelaxedMmio, + /// }; + /// + /// fn do_io(io: &Mmio<0x100>) { + /// // The access is performed using `readl_relaxed` instead of `readl`. + /// let v = io.relaxed().read32(0x10); + /// } + /// + /// ``` + pub fn relaxed(&self) -> &RelaxedMmio { + // SAFETY: `RelaxedMmio` is `#[repr(transparent)]` over `Mmio`, so `Mmio` and + // `RelaxedMmio` have identical layout. + unsafe { core::mem::transmute(self) } + } +} + +// MMIO regions support 8, 16, and 32-bit accesses. +impl_mmio_io_capable!(RelaxedMmio, u8, readb_relaxed, writeb_relaxed); +impl_mmio_io_capable!(RelaxedMmio, u16, readw_relaxed, writew_relaxed); +impl_mmio_io_capable!(RelaxedMmio, u32, readl_relaxed, writel_relaxed); +// MMIO regions on 64-bit systems also support 64-bit accesses. +impl_mmio_io_capable!( + RelaxedMmio, + #[cfg(CONFIG_64BIT)] + u64, + readq_relaxed, + writeq_relaxed +); From e385eb0d1c2c4d2dbc48d1bcbc44fd43cbb154a4 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 6 Feb 2026 15:00:18 +0900 Subject: [PATCH 25/67] rust: io: remove legacy relaxed accessors of Mmio The relaxed access functionality is now provided by the `RelaxedMmio` wrapper type, and we don't have any user of the legacy methods left. Remove them. Reviewed-by: Daniel Almeida Acked-by: Alice Ryhl Signed-off-by: Alexandre Courbot Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260206-io-v2-4-71dea20a06e6@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index 8a0e35070153..0d946e0c5d4f 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -701,46 +701,6 @@ pub unsafe fn from_raw(raw: &MmioRaw) -> &Self { // SAFETY: `Mmio` is a transparent wrapper around `MmioRaw`. unsafe { &*core::ptr::from_ref(raw).cast() } } - - io_define_read!(infallible, pub read8_relaxed, call_mmio_read(readb_relaxed) -> u8); - io_define_read!(infallible, pub read16_relaxed, call_mmio_read(readw_relaxed) -> u16); - io_define_read!(infallible, pub read32_relaxed, call_mmio_read(readl_relaxed) -> u32); - io_define_read!( - infallible, - #[cfg(CONFIG_64BIT)] - pub read64_relaxed, - call_mmio_read(readq_relaxed) -> u64 - ); - - io_define_read!(fallible, pub try_read8_relaxed, call_mmio_read(readb_relaxed) -> u8); - io_define_read!(fallible, pub try_read16_relaxed, call_mmio_read(readw_relaxed) -> u16); - io_define_read!(fallible, pub try_read32_relaxed, call_mmio_read(readl_relaxed) -> u32); - io_define_read!( - fallible, - #[cfg(CONFIG_64BIT)] - pub try_read64_relaxed, - call_mmio_read(readq_relaxed) -> u64 - ); - - io_define_write!(infallible, pub write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); - io_define_write!(infallible, pub write16_relaxed, call_mmio_write(writew_relaxed) <- u16); - io_define_write!(infallible, pub write32_relaxed, call_mmio_write(writel_relaxed) <- u32); - io_define_write!( - infallible, - #[cfg(CONFIG_64BIT)] - pub write64_relaxed, - call_mmio_write(writeq_relaxed) <- u64 - ); - - io_define_write!(fallible, pub try_write8_relaxed, call_mmio_write(writeb_relaxed) <- u8); - io_define_write!(fallible, pub try_write16_relaxed, call_mmio_write(writew_relaxed) <- u16); - io_define_write!(fallible, pub try_write32_relaxed, call_mmio_write(writel_relaxed) <- u32); - io_define_write!( - fallible, - #[cfg(CONFIG_64BIT)] - pub try_write64_relaxed, - call_mmio_write(writeq_relaxed) <- u64 - ); } /// [`Mmio`] wrapper using relaxed accessors. From 50aad5510fbbf8dd8f5f63380e1a1e7ae73216c4 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 6 Feb 2026 15:00:19 +0900 Subject: [PATCH 26/67] rust: pci: io: remove overloaded Io methods of ConfigSpace Since `ConfigSpace` now has the relevant implementations of `IoCapable`, the default methods of `Io` can be used in place of the overloaded ones. Remove them as well as the macros generating them. Reviewed-by: Daniel Almeida Acked-by: Alice Ryhl Signed-off-by: Alexandre Courbot Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260206-io-v2-5-71dea20a06e6@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 4 --- rust/kernel/pci/io.rs | 70 ------------------------------------------- 2 files changed, 74 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index 0d946e0c5d4f..2ae2420be344 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -215,7 +215,6 @@ macro_rules! call_mmio_write { /// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the /// `$call_macro`. /// * `$type_name:ty` - The Rust type of the value being read (e.g., `u8`, `u32`). -#[macro_export] macro_rules! io_define_read { (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) -> $type_name:ty) => { @@ -249,7 +248,6 @@ macro_rules! io_define_read { } }; } -pub use io_define_read; /// Generates an accessor method for writing to an I/O backend. /// @@ -274,7 +272,6 @@ macro_rules! io_define_read { /// `$call_macro`. /// * `$type_name:ty` - The Rust type of the value being written (e.g., `u8`, `u32`). Note the use /// of `<-` before the type to denote a write operation. -#[macro_export] macro_rules! io_define_write { (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) <- $type_name:ty) => { @@ -306,7 +303,6 @@ macro_rules! io_define_write { } }; } -pub use io_define_write; /// Checks whether an access of type `U` at the given `offset` /// is valid within this region. diff --git a/rust/kernel/pci/io.rs b/rust/kernel/pci/io.rs index 4feca8033eb4..ae78676c927f 100644 --- a/rust/kernel/pci/io.rs +++ b/rust/kernel/pci/io.rs @@ -8,8 +8,6 @@ device, devres::Devres, io::{ - io_define_read, - io_define_write, Io, IoCapable, IoKnownSize, @@ -85,63 +83,6 @@ pub struct ConfigSpace<'a, S: ConfigSpaceKind = Extended> { _marker: PhantomData, } -/// Internal helper macros used to invoke C PCI configuration space read functions. -/// -/// This macro is intended to be used by higher-level PCI configuration space access macros -/// (io_define_read) and provides a unified expansion for infallible vs. fallible read semantics. It -/// emits a direct call into the corresponding C helper and performs the required cast to the Rust -/// return type. -/// -/// # Parameters -/// -/// * `$c_fn` – The C function performing the PCI configuration space write. -/// * `$self` – The I/O backend object. -/// * `$ty` – The type of the value to read. -/// * `$addr` – The PCI configuration space offset to read. -/// -/// This macro does not perform any validation; all invariants must be upheld by the higher-level -/// abstraction invoking it. -macro_rules! call_config_read { - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr) => {{ - let mut val: $ty = 0; - // SAFETY: By the type invariant `$self.pdev` is a valid address. - // CAST: The offset is cast to `i32` because the C functions expect a 32-bit signed offset - // parameter. PCI configuration space size is at most 4096 bytes, so the value always fits - // within `i32` without truncation or sign change. - // Return value from C function is ignored in infallible accessors. - let _ret = unsafe { bindings::$c_fn($self.pdev.as_raw(), $addr as i32, &mut val) }; - val - }}; -} - -/// Internal helper macros used to invoke C PCI configuration space write functions. -/// -/// This macro is intended to be used by higher-level PCI configuration space access macros -/// (io_define_write) and provides a unified expansion for infallible vs. fallible read semantics. -/// It emits a direct call into the corresponding C helper and performs the required cast to the -/// Rust return type. -/// -/// # Parameters -/// -/// * `$c_fn` – The C function performing the PCI configuration space write. -/// * `$self` – The I/O backend object. -/// * `$ty` – The type of the written value. -/// * `$addr` – The configuration space offset to write. -/// * `$value` – The value to write. -/// -/// This macro does not perform any validation; all invariants must be upheld by the higher-level -/// abstraction invoking it. -macro_rules! call_config_write { - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => { - // SAFETY: By the type invariant `$self.pdev` is a valid address. - // CAST: The offset is cast to `i32` because the C functions expect a 32-bit signed offset - // parameter. PCI configuration space size is at most 4096 bytes, so the value always fits - // within `i32` without truncation or sign change. - // Return value from C function is ignored in infallible accessors. - let _ret = unsafe { bindings::$c_fn($self.pdev.as_raw(), $addr as i32, $value) }; - }; -} - /// Implements [`IoCapable`] on [`ConfigSpace`] for `$ty` using `$read_fn` and `$write_fn`. macro_rules! impl_config_space_io_capable { ($ty:ty, $read_fn:ident, $write_fn:ident) => { @@ -190,17 +131,6 @@ fn addr(&self) -> usize { fn maxsize(&self) -> usize { self.pdev.cfg_size().into_raw() } - - // PCI configuration space does not support fallible operations. - // The default implementations from the Io trait are not used. - - io_define_read!(infallible, read8, call_config_read(pci_read_config_byte) -> u8); - io_define_read!(infallible, read16, call_config_read(pci_read_config_word) -> u16); - io_define_read!(infallible, read32, call_config_read(pci_read_config_dword) -> u32); - - io_define_write!(infallible, write8, call_config_write(pci_write_config_byte) <- u8); - io_define_write!(infallible, write16, call_config_write(pci_write_config_word) <- u16); - io_define_write!(infallible, write32, call_config_write(pci_write_config_dword) <- u32); } impl<'a, S: ConfigSpaceKind> IoKnownSize for ConfigSpace<'a, S> { From 6291ee23da4224a7584ece2d292104e872b9b5fc Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 6 Feb 2026 15:00:20 +0900 Subject: [PATCH 27/67] rust: io: remove overloaded Io methods of Mmio Since `Mmio` now has the relevant implementations of `IoCapable`, the default methods of `Io` can be used in place of the overloaded ones. Remove them as well as the macros generating them. Reviewed-by: Daniel Almeida Acked-by: Alice Ryhl Signed-off-by: Alexandre Courbot Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260206-io-v2-6-71dea20a06e6@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 207 ---------------------------------------------- 1 file changed, 207 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index 2ae2420be344..947eb378d297 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -137,173 +137,6 @@ pub fn maxsize(&self) -> usize { #[repr(transparent)] pub struct Mmio(MmioRaw); -/// Internal helper macros used to invoke C MMIO read functions. -/// -/// This macro is intended to be used by higher-level MMIO access macros (io_define_read) and -/// provides a unified expansion for infallible vs. fallible read semantics. It emits a direct call -/// into the corresponding C helper and performs the required cast to the Rust return type. -/// -/// # Parameters -/// -/// * `$c_fn` – The C function performing the MMIO read. -/// * `$self` – The I/O backend object. -/// * `$ty` – The type of the value to be read. -/// * `$addr` – The MMIO address to read. -/// -/// This macro does not perform any validation; all invariants must be upheld by the higher-level -/// abstraction invoking it. -macro_rules! call_mmio_read { - (infallible, $c_fn:ident, $self:ident, $type:ty, $addr:expr) => { - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn($addr as *const c_void) as $type } - }; - - (fallible, $c_fn:ident, $self:ident, $type:ty, $addr:expr) => {{ - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - Ok(unsafe { bindings::$c_fn($addr as *const c_void) as $type }) - }}; -} - -/// Internal helper macros used to invoke C MMIO write functions. -/// -/// This macro is intended to be used by higher-level MMIO access macros (io_define_write) and -/// provides a unified expansion for infallible vs. fallible write semantics. It emits a direct call -/// into the corresponding C helper and performs the required cast to the Rust return type. -/// -/// # Parameters -/// -/// * `$c_fn` – The C function performing the MMIO write. -/// * `$self` – The I/O backend object. -/// * `$ty` – The type of the written value. -/// * `$addr` – The MMIO address to write. -/// * `$value` – The value to write. -/// -/// This macro does not perform any validation; all invariants must be upheld by the higher-level -/// abstraction invoking it. -macro_rules! call_mmio_write { - (infallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => { - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn($value, $addr as *mut c_void) } - }; - - (fallible, $c_fn:ident, $self:ident, $ty:ty, $addr:expr, $value:expr) => {{ - // SAFETY: By the type invariant `addr` is a valid address for MMIO operations. - unsafe { bindings::$c_fn($value, $addr as *mut c_void) }; - Ok(()) - }}; -} - -/// Generates an accessor method for reading from an I/O backend. -/// -/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked -/// (infallible) or runtime bounds-checked (fallible) read methods. It abstracts the address -/// calculation and bounds checking, and delegates the actual I/O read operation to a specified -/// helper macro, making it generic over different I/O backends. -/// -/// # Parameters -/// -/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on -/// `IoKnownSize` for compile-time checks and returns the value directly. `fallible` performs -/// runtime checks against `maxsize()` and returns a `Result`. -/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., -/// `#[cfg(CONFIG_64BIT)]` or inline directives). -/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). -/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `read32`, -/// `try_read8`). -/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call -/// (e.g., `call_mmio_read`). -/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the -/// `$call_macro`. -/// * `$type_name:ty` - The Rust type of the value being read (e.g., `u8`, `u32`). -macro_rules! io_define_read { - (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) -> - $type_name:ty) => { - /// Read IO data from a given offset known at compile time. - /// - /// Bound checks are performed on compile time, hence if the offset is not known at compile - /// time, the build will fail. - $(#[$attr])* - // Always inline to optimize out error path of `io_addr_assert`. - #[inline(always)] - $vis fn $name(&self, offset: usize) -> $type_name { - let addr = self.io_addr_assert::<$type_name>(offset); - - // SAFETY: By the type invariant `addr` is a valid address for IO operations. - $call_macro!(infallible, $c_fn, self, $type_name, addr) - } - }; - - (fallible, $(#[$attr:meta])* $vis:vis $try_name:ident, $call_macro:ident($c_fn:ident) -> - $type_name:ty) => { - /// Read IO data from a given offset. - /// - /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is - /// out of bounds. - $(#[$attr])* - $vis fn $try_name(&self, offset: usize) -> Result<$type_name> { - let addr = self.io_addr::<$type_name>(offset)?; - - // SAFETY: By the type invariant `addr` is a valid address for IO operations. - $call_macro!(fallible, $c_fn, self, $type_name, addr) - } - }; -} - -/// Generates an accessor method for writing to an I/O backend. -/// -/// This macro reduces boilerplate by automatically generating either compile-time bounds-checked -/// (infallible) or runtime bounds-checked (fallible) write methods. It abstracts the address -/// calculation and bounds checking, and delegates the actual I/O write operation to a specified -/// helper macro, making it generic over different I/O backends. -/// -/// # Parameters -/// -/// * `infallible` / `fallible` - Determines the bounds-checking strategy. `infallible` relies on -/// `IoKnownSize` for compile-time checks and returns `()`. `fallible` performs runtime checks -/// against `maxsize()` and returns a `Result`. -/// * `$(#[$attr:meta])*` - Optional attributes to apply to the generated method (e.g., -/// `#[cfg(CONFIG_64BIT)]` or inline directives). -/// * `$vis:vis` - The visibility of the generated method (e.g., `pub`). -/// * `$name:ident` / `$try_name:ident` - The name of the generated method (e.g., `write32`, -/// `try_write8`). -/// * `$call_macro:ident` - The backend-specific helper macro used to emit the actual I/O call -/// (e.g., `call_mmio_write`). -/// * `$c_fn:ident` - The backend-specific C function or identifier to be passed into the -/// `$call_macro`. -/// * `$type_name:ty` - The Rust type of the value being written (e.g., `u8`, `u32`). Note the use -/// of `<-` before the type to denote a write operation. -macro_rules! io_define_write { - (infallible, $(#[$attr:meta])* $vis:vis $name:ident, $call_macro:ident($c_fn:ident) <- - $type_name:ty) => { - /// Write IO data from a given offset known at compile time. - /// - /// Bound checks are performed on compile time, hence if the offset is not known at compile - /// time, the build will fail. - $(#[$attr])* - // Always inline to optimize out error path of `io_addr_assert`. - #[inline(always)] - $vis fn $name(&self, value: $type_name, offset: usize) { - let addr = self.io_addr_assert::<$type_name>(offset); - - $call_macro!(infallible, $c_fn, self, $type_name, addr, value); - } - }; - - (fallible, $(#[$attr:meta])* $vis:vis $try_name:ident, $call_macro:ident($c_fn:ident) <- - $type_name:ty) => { - /// Write IO data from a given offset. - /// - /// Bound checks are performed on runtime, it fails if the offset (plus the type size) is - /// out of bounds. - $(#[$attr])* - $vis fn $try_name(&self, value: $type_name, offset: usize) -> Result { - let addr = self.io_addr::<$type_name>(offset)?; - - $call_macro!(fallible, $c_fn, self, $type_name, addr, value) - } - }; -} - /// Checks whether an access of type `U` at the given `offset` /// is valid within this region. #[inline] @@ -640,46 +473,6 @@ fn addr(&self) -> usize { fn maxsize(&self) -> usize { self.0.maxsize() } - - io_define_read!(fallible, try_read8, call_mmio_read(readb) -> u8); - io_define_read!(fallible, try_read16, call_mmio_read(readw) -> u16); - io_define_read!(fallible, try_read32, call_mmio_read(readl) -> u32); - io_define_read!( - fallible, - #[cfg(CONFIG_64BIT)] - try_read64, - call_mmio_read(readq) -> u64 - ); - - io_define_write!(fallible, try_write8, call_mmio_write(writeb) <- u8); - io_define_write!(fallible, try_write16, call_mmio_write(writew) <- u16); - io_define_write!(fallible, try_write32, call_mmio_write(writel) <- u32); - io_define_write!( - fallible, - #[cfg(CONFIG_64BIT)] - try_write64, - call_mmio_write(writeq) <- u64 - ); - - io_define_read!(infallible, read8, call_mmio_read(readb) -> u8); - io_define_read!(infallible, read16, call_mmio_read(readw) -> u16); - io_define_read!(infallible, read32, call_mmio_read(readl) -> u32); - io_define_read!( - infallible, - #[cfg(CONFIG_64BIT)] - read64, - call_mmio_read(readq) -> u64 - ); - - io_define_write!(infallible, write8, call_mmio_write(writeb) <- u8); - io_define_write!(infallible, write16, call_mmio_write(writew) <- u16); - io_define_write!(infallible, write32, call_mmio_write(writel) <- u32); - io_define_write!( - infallible, - #[cfg(CONFIG_64BIT)] - write64, - call_mmio_write(writeq) <- u64 - ); } impl IoKnownSize for Mmio { From 3cc319d5f433a4d560cc944ecfb1fe50b866cd66 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:11 +0900 Subject: [PATCH 28/67] rust: enable the `generic_arg_infer` feature This feature is stable since 1.89, and used in subsequent patches. Reviewed-by: Gary Guo Tested-by: Dirk Behme Acked-by: Miguel Ojeda Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20260314-register-v9-1-86805b2f7e9d@nvidia.com [ Resolve merge conflict. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/lib.rs | 3 +++ scripts/Makefile.build | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index d93292d47420..34b924819288 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -41,6 +41,9 @@ // Stable since Rust 1.84.0. #![feature(strict_provenance)] // +// Stable since Rust 1.89.0. +#![feature(generic_arg_infer)] +// // Expected to become stable. #![feature(arbitrary_self_types)] // diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3652b85be545..010d08472fb2 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -316,12 +316,13 @@ $(obj)/%.lst: $(obj)/%.c FORCE # `feature(offset_of_nested)`, `feature(raw_ref_op)`. # - Stable since Rust 1.84.0: `feature(strict_provenance)`. # - Stable since Rust 1.87.0: `feature(asm_goto)`. +# - Stable since Rust 1.89.0: `feature(generic_arg_infer)`. # - Expected to become stable: `feature(arbitrary_self_types)`. # - To be determined: `feature(used_with_arg)`. # # Please see https://github.com/Rust-for-Linux/linux/issues/2 for details on # the unstable features in use. -rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,lint_reasons,offset_of_nested,raw_ref_op,slice_ptr_len,strict_provenance,used_with_arg +rust_allowed_features := asm_const,asm_goto,arbitrary_self_types,generic_arg_infer,lint_reasons,offset_of_nested,raw_ref_op,slice_ptr_len,strict_provenance,used_with_arg # `--out-dir` is required to avoid temporaries being created by `rustc` in the # current working directory, which may be not accessible in the out-of-tree From c59a2d14cd248c77457b821b15c72e6a6a268553 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:12 +0900 Subject: [PATCH 29/67] rust: num: add `shr` and `shl` methods to `Bounded` Shifting a `Bounded` left or right changes the number of bits required to represent the value. Add methods that perform the shift and return a `Bounded` with the appropriately adjusted bit width. These methods are particularly useful for bitfield extraction. Suggested-by: Alice Ryhl Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Reviewed-by: Daniel Almeida Tested-by: Dirk Behme Acked-by: Miguel Ojeda Acked-by: Yury Norov Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20260314-register-v9-2-86805b2f7e9d@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/num/bounded.rs | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs index fa81acbdc8c2..2f5f13ecd3d6 100644 --- a/rust/kernel/num/bounded.rs +++ b/rust/kernel/num/bounded.rs @@ -473,6 +473,48 @@ pub fn cast(self) -> Bounded // `N` bits, and with the same signedness. unsafe { Bounded::__new(value) } } + + /// Right-shifts `self` by `SHIFT` and returns the result as a `Bounded<_, RES>`, where `RES >= + /// N - SHIFT`. + /// + /// # Examples + /// + /// ``` + /// use kernel::num::Bounded; + /// + /// let v = Bounded::::new::<0xff00>(); + /// let v_shifted: Bounded:: = v.shr::<8, _>(); + /// + /// assert_eq!(v_shifted.get(), 0xff); + /// ``` + pub fn shr(self) -> Bounded { + const { assert!(RES + SHIFT >= N) } + + // SAFETY: We shift the value right by `SHIFT`, reducing the number of bits needed to + // represent the shifted value by as much, and just asserted that `RES >= N - SHIFT`. + unsafe { Bounded::__new(self.0 >> SHIFT) } + } + + /// Left-shifts `self` by `SHIFT` and returns the result as a `Bounded<_, RES>`, where `RES >= + /// N + SHIFT`. + /// + /// # Examples + /// + /// ``` + /// use kernel::num::Bounded; + /// + /// let v = Bounded::::new::<0xff>(); + /// let v_shifted: Bounded:: = v.shl::<8, _>(); + /// + /// assert_eq!(v_shifted.get(), 0xff00); + /// ``` + pub fn shl(self) -> Bounded { + const { assert!(RES >= N + SHIFT) } + + // SAFETY: We shift the value left by `SHIFT`, augmenting the number of bits needed to + // represent the shifted value by as much, and just asserted that `RES >= N + SHIFT`. + unsafe { Bounded::__new(self.0 << SHIFT) } + } } impl Deref for Bounded From 164f8634bfd8eef7b90c429156c59706635cfb88 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:13 +0900 Subject: [PATCH 30/67] rust: num: add `into_bool` method to `Bounded` Single-bit numbers are typically treated as booleans. There is an `Into` implementation for those, but invoking it from contexts that lack type expectations is not always convenient. Add an `into_bool` method as a simpler shortcut. Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Reviewed-by: Daniel Almeida Reviewed-by: Yury Norov Tested-by: Dirk Behme Acked-by: Miguel Ojeda Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20260314-register-v9-3-86805b2f7e9d@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/num/bounded.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs index 2f5f13ecd3d6..d28d118abd8e 100644 --- a/rust/kernel/num/bounded.rs +++ b/rust/kernel/num/bounded.rs @@ -1101,3 +1101,24 @@ fn from(value: bool) -> Self { unsafe { Self::__new(T::from(value)) } } } + +impl Bounded +where + T: Integer + Zeroable, +{ + /// Converts this [`Bounded`] into a [`bool`]. + /// + /// This is a shorter way of writing `bool::from(self)`. + /// + /// # Examples + /// + /// ``` + /// use kernel::num::Bounded; + /// + /// assert_eq!(Bounded::::new::<0>().into_bool(), false); + /// assert_eq!(Bounded::::new::<1>().into_bool(), true); + /// ``` + pub fn into_bool(self) -> bool { + self.into() + } +} From 7836ec76ec5cd8d45759a6a360b1fda4829d2734 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:14 +0900 Subject: [PATCH 31/67] rust: num: make Bounded::get const There is a need to access the inner value of a `Bounded` in const context, notably for bitfields and registers. Remove the invariant check of `Bounded::get`, which allows us to make it const. Reviewed-by: Gary Guo Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20260314-register-v9-4-86805b2f7e9d@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/num/bounded.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rust/kernel/num/bounded.rs b/rust/kernel/num/bounded.rs index d28d118abd8e..bbab6bbcb315 100644 --- a/rust/kernel/num/bounded.rs +++ b/rust/kernel/num/bounded.rs @@ -379,6 +379,9 @@ pub fn from_expr(expr: T) -> Self { /// Returns the wrapped value as the backing type. /// + /// This is similar to the [`Deref`] implementation, but doesn't enforce the size invariant of + /// the [`Bounded`], which might produce slightly less optimal code. + /// /// # Examples /// /// ``` @@ -387,8 +390,8 @@ pub fn from_expr(expr: T) -> Self { /// let v = Bounded::::new::<7>(); /// assert_eq!(v.get(), 7u32); /// ``` - pub fn get(self) -> T { - *self.deref() + pub const fn get(self) -> T { + self.0 } /// Increases the number of bits usable for `self`. From 498823541be1e2d9f947b37a10cc98e681da9828 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:15 +0900 Subject: [PATCH 32/67] rust: io: add IoLoc type and generic I/O accessors I/O accesses are defined by the following properties: - An I/O location, which consists of a start address, a width, and a type to interpret the read value as, - A value, which is returned for reads or provided for writes. Introduce the `IoLoc` trait, which allows implementing types to fully specify an I/O location. This allows I/O operations to be made generic through the new `read` and `write` methods. This design will allow us to factorize the I/O code working with primitives, and to introduce ways to perform I/O with a higher degree of control through register types. Co-developed-by: Gary Guo Signed-off-by: Gary Guo Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20260314-register-v9-5-86805b2f7e9d@nvidia.com [ Fix incorrect reference to io_addr_assert() in try_update(). - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 124 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index 947eb378d297..e1e9802bb603 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -173,6 +173,30 @@ pub trait IoCapable { unsafe fn io_write(&self, value: T, address: usize); } +/// Describes a given I/O location: its offset, width, and type to convert the raw value from and +/// into. +/// +/// This trait is the key abstraction allowing [`Io::read`], [`Io::write`], and [`Io::update`] (and +/// their fallible [`try_read`](Io::try_read), [`try_write`](Io::try_write) and +/// [`try_update`](Io::try_update) counterparts) to work uniformly with both raw [`usize`] offsets +/// (for primitive types like [`u32`]) and typed ones. +/// +/// An `IoLoc` carries three pieces of information: +/// +/// - The offset to access (returned by [`IoLoc::offset`]), +/// - The width of the access (determined by [`IoLoc::IoType`]), +/// - The type `T` in which the raw data is returned or provided. +/// +/// `T` and `IoLoc::IoType` may differ: for instance, a typed register has `T` = the register type +/// with its bitfields, and `IoType` = its backing primitive (e.g. `u32`). +pub trait IoLoc { + /// Size ([`u8`], [`u16`], etc) of the I/O performed on the returned [`offset`](IoLoc::offset). + type IoType: Into + From; + + /// Consumes `self` and returns the offset of this location. + fn offset(self) -> usize; +} + /// Types implementing this trait (e.g. MMIO BARs or PCI config regions) /// can perform I/O operations on regions of memory. /// @@ -406,6 +430,106 @@ fn write64(&self, value: u64, offset: usize) // SAFETY: `address` has been validated by `io_addr_assert`. unsafe { self.io_write(value, address) } } + + /// Generic fallible read with runtime bounds check. + #[inline(always)] + fn try_read(&self, location: L) -> Result + where + L: IoLoc, + Self: IoCapable, + { + let address = self.io_addr::(location.offset())?; + + // SAFETY: `address` has been validated by `io_addr`. + Ok(unsafe { self.io_read(address) }.into()) + } + + /// Generic fallible write with runtime bounds check. + #[inline(always)] + fn try_write(&self, location: L, value: T) -> Result + where + L: IoLoc, + Self: IoCapable, + { + let address = self.io_addr::(location.offset())?; + let io_value = value.into(); + + // SAFETY: `address` has been validated by `io_addr`. + unsafe { self.io_write(io_value, address) } + + Ok(()) + } + + /// Generic fallible update with runtime bounds check. + /// + /// Note: this does not perform any synchronization. The caller is responsible for ensuring + /// exclusive access if required. + #[inline(always)] + fn try_update(&self, location: L, f: F) -> Result + where + L: IoLoc, + Self: IoCapable, + F: FnOnce(T) -> T, + { + let address = self.io_addr::(location.offset())?; + + // SAFETY: `address` has been validated by `io_addr`. + let value: T = unsafe { self.io_read(address) }.into(); + let io_value = f(value).into(); + + // SAFETY: `address` has been validated by `io_addr`. + unsafe { self.io_write(io_value, address) } + + Ok(()) + } + + /// Generic infallible read with compile-time bounds check. + #[inline(always)] + fn read(&self, location: L) -> T + where + L: IoLoc, + Self: IoKnownSize + IoCapable, + { + let address = self.io_addr_assert::(location.offset()); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_read(address) }.into() + } + + /// Generic infallible write with compile-time bounds check. + #[inline(always)] + fn write(&self, location: L, value: T) + where + L: IoLoc, + Self: IoKnownSize + IoCapable, + { + let address = self.io_addr_assert::(location.offset()); + let io_value = value.into(); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_write(io_value, address) } + } + + /// Generic infallible update with compile-time bounds check. + /// + /// Note: this does not perform any synchronization. The caller is responsible for ensuring + /// exclusive access if required. + #[inline(always)] + fn update(&self, location: L, f: F) + where + L: IoLoc, + Self: IoKnownSize + IoCapable + Sized, + F: FnOnce(T) -> T, + { + let address = self.io_addr_assert::(location.offset()); + + // SAFETY: `address` has been validated by `io_addr_assert`. + let value: T = unsafe { self.io_read(address) }.into(); + let io_value = f(value).into(); + + // SAFETY: `address` has been validated by `io_addr_assert`. + unsafe { self.io_write(io_value, address) } + } } /// Trait for types with a known size at compile time. From 147b41ba23d63f43ed34e6940417a5506e323370 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:16 +0900 Subject: [PATCH 33/67] rust: io: use generic read/write accessors for primitive accesses By providing the required `IoLoc` implementations on `usize`, we can leverage the generic accessors and reduce the number of unsafe blocks in the module. This also allows us to directly call the generic `read/write/update` methods with primitive types, so add examples illustrating this. Signed-off-by: Alexandre Courbot Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260314-register-v9-6-86805b2f7e9d@nvidia.com [ Slightly improve wording in doc-comment. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 218 +++++++++++++++++++++++++++++++--------------- 1 file changed, 150 insertions(+), 68 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index e1e9802bb603..c51a87b9169b 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -197,6 +197,26 @@ pub trait IoLoc { fn offset(self) -> usize; } +/// Implements [`IoLoc<$ty>`] for [`usize`], allowing [`usize`] to be used as a parameter of +/// [`Io::read`] and [`Io::write`]. +macro_rules! impl_usize_ioloc { + ($($ty:ty),*) => { + $( + impl IoLoc<$ty> for usize { + type IoType = $ty; + + #[inline(always)] + fn offset(self) -> usize { + self + } + } + )* + } +} + +// Provide the ability to read any primitive type from a [`usize`]. +impl_usize_ioloc!(u8, u16, u32, u64); + /// Types implementing this trait (e.g. MMIO BARs or PCI config regions) /// can perform I/O operations on regions of memory. /// @@ -241,10 +261,7 @@ fn try_read8(&self, offset: usize) -> Result where Self: IoCapable, { - let address = self.io_addr::(offset)?; - - // SAFETY: `address` has been validated by `io_addr`. - Ok(unsafe { self.io_read(address) }) + self.try_read(offset) } /// Fallible 16-bit read with runtime bounds check. @@ -253,10 +270,7 @@ fn try_read16(&self, offset: usize) -> Result where Self: IoCapable, { - let address = self.io_addr::(offset)?; - - // SAFETY: `address` has been validated by `io_addr`. - Ok(unsafe { self.io_read(address) }) + self.try_read(offset) } /// Fallible 32-bit read with runtime bounds check. @@ -265,10 +279,7 @@ fn try_read32(&self, offset: usize) -> Result where Self: IoCapable, { - let address = self.io_addr::(offset)?; - - // SAFETY: `address` has been validated by `io_addr`. - Ok(unsafe { self.io_read(address) }) + self.try_read(offset) } /// Fallible 64-bit read with runtime bounds check. @@ -277,10 +288,7 @@ fn try_read64(&self, offset: usize) -> Result where Self: IoCapable, { - let address = self.io_addr::(offset)?; - - // SAFETY: `address` has been validated by `io_addr`. - Ok(unsafe { self.io_read(address) }) + self.try_read(offset) } /// Fallible 8-bit write with runtime bounds check. @@ -289,11 +297,7 @@ fn try_write8(&self, value: u8, offset: usize) -> Result where Self: IoCapable, { - let address = self.io_addr::(offset)?; - - // SAFETY: `address` has been validated by `io_addr`. - unsafe { self.io_write(value, address) }; - Ok(()) + self.try_write(offset, value) } /// Fallible 16-bit write with runtime bounds check. @@ -302,11 +306,7 @@ fn try_write16(&self, value: u16, offset: usize) -> Result where Self: IoCapable, { - let address = self.io_addr::(offset)?; - - // SAFETY: `address` has been validated by `io_addr`. - unsafe { self.io_write(value, address) }; - Ok(()) + self.try_write(offset, value) } /// Fallible 32-bit write with runtime bounds check. @@ -315,11 +315,7 @@ fn try_write32(&self, value: u32, offset: usize) -> Result where Self: IoCapable, { - let address = self.io_addr::(offset)?; - - // SAFETY: `address` has been validated by `io_addr`. - unsafe { self.io_write(value, address) }; - Ok(()) + self.try_write(offset, value) } /// Fallible 64-bit write with runtime bounds check. @@ -328,11 +324,7 @@ fn try_write64(&self, value: u64, offset: usize) -> Result where Self: IoCapable, { - let address = self.io_addr::(offset)?; - - // SAFETY: `address` has been validated by `io_addr`. - unsafe { self.io_write(value, address) }; - Ok(()) + self.try_write(offset, value) } /// Infallible 8-bit read with compile-time bounds check. @@ -341,10 +333,7 @@ fn read8(&self, offset: usize) -> u8 where Self: IoKnownSize + IoCapable, { - let address = self.io_addr_assert::(offset); - - // SAFETY: `address` has been validated by `io_addr_assert`. - unsafe { self.io_read(address) } + self.read(offset) } /// Infallible 16-bit read with compile-time bounds check. @@ -353,10 +342,7 @@ fn read16(&self, offset: usize) -> u16 where Self: IoKnownSize + IoCapable, { - let address = self.io_addr_assert::(offset); - - // SAFETY: `address` has been validated by `io_addr_assert`. - unsafe { self.io_read(address) } + self.read(offset) } /// Infallible 32-bit read with compile-time bounds check. @@ -365,10 +351,7 @@ fn read32(&self, offset: usize) -> u32 where Self: IoKnownSize + IoCapable, { - let address = self.io_addr_assert::(offset); - - // SAFETY: `address` has been validated by `io_addr_assert`. - unsafe { self.io_read(address) } + self.read(offset) } /// Infallible 64-bit read with compile-time bounds check. @@ -377,10 +360,7 @@ fn read64(&self, offset: usize) -> u64 where Self: IoKnownSize + IoCapable, { - let address = self.io_addr_assert::(offset); - - // SAFETY: `address` has been validated by `io_addr_assert`. - unsafe { self.io_read(address) } + self.read(offset) } /// Infallible 8-bit write with compile-time bounds check. @@ -389,10 +369,7 @@ fn write8(&self, value: u8, offset: usize) where Self: IoKnownSize + IoCapable, { - let address = self.io_addr_assert::(offset); - - // SAFETY: `address` has been validated by `io_addr_assert`. - unsafe { self.io_write(value, address) } + self.write(offset, value) } /// Infallible 16-bit write with compile-time bounds check. @@ -401,10 +378,7 @@ fn write16(&self, value: u16, offset: usize) where Self: IoKnownSize + IoCapable, { - let address = self.io_addr_assert::(offset); - - // SAFETY: `address` has been validated by `io_addr_assert`. - unsafe { self.io_write(value, address) } + self.write(offset, value) } /// Infallible 32-bit write with compile-time bounds check. @@ -413,10 +387,7 @@ fn write32(&self, value: u32, offset: usize) where Self: IoKnownSize + IoCapable, { - let address = self.io_addr_assert::(offset); - - // SAFETY: `address` has been validated by `io_addr_assert`. - unsafe { self.io_write(value, address) } + self.write(offset, value) } /// Infallible 64-bit write with compile-time bounds check. @@ -425,13 +396,31 @@ fn write64(&self, value: u64, offset: usize) where Self: IoKnownSize + IoCapable, { - let address = self.io_addr_assert::(offset); - - // SAFETY: `address` has been validated by `io_addr_assert`. - unsafe { self.io_write(value, address) } + self.write(offset, value) } /// Generic fallible read with runtime bounds check. + /// + /// # Examples + /// + /// Read a primitive type from an I/O address: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_reads(io: &Mmio) -> Result { + /// // 32-bit read from address `0x10`. + /// let v: u32 = io.try_read(0x10)?; + /// + /// // 8-bit read from address `0xfff`. + /// let v: u8 = io.try_read(0xfff)?; + /// + /// Ok(()) + /// } + /// ``` #[inline(always)] fn try_read(&self, location: L) -> Result where @@ -445,6 +434,27 @@ fn try_read(&self, location: L) -> Result } /// Generic fallible write with runtime bounds check. + /// + /// # Examples + /// + /// Write a primitive type to an I/O address: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_writes(io: &Mmio) -> Result { + /// // 32-bit write of value `1` at address `0x10`. + /// io.try_write(0x10, 1u32)?; + /// + /// // 8-bit write of value `0xff` at address `0xfff`. + /// io.try_write(0xfff, 0xffu8)?; + /// + /// Ok(()) + /// } + /// ``` #[inline(always)] fn try_write(&self, location: L, value: T) -> Result where @@ -464,6 +474,23 @@ fn try_write(&self, location: L, value: T) -> Result /// /// Note: this does not perform any synchronization. The caller is responsible for ensuring /// exclusive access if required. + /// + /// # Examples + /// + /// Read the u32 value at address `0x10`, increment it, and store the updated value back: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_update(io: &Mmio<0x1000>) -> Result { + /// io.try_update(0x10, |v: u32| { + /// v + 1 + /// }) + /// } + /// ``` #[inline(always)] fn try_update(&self, location: L, f: F) -> Result where @@ -484,6 +511,25 @@ fn try_update(&self, location: L, f: F) -> Result } /// Generic infallible read with compile-time bounds check. + /// + /// # Examples + /// + /// Read a primitive type from an I/O address: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_reads(io: &Mmio<0x1000>) { + /// // 32-bit read from address `0x10`. + /// let v: u32 = io.read(0x10); + /// + /// // 8-bit read from the top of the I/O space. + /// let v: u8 = io.read(0xfff); + /// } + /// ``` #[inline(always)] fn read(&self, location: L) -> T where @@ -497,6 +543,25 @@ fn read(&self, location: L) -> T } /// Generic infallible write with compile-time bounds check. + /// + /// # Examples + /// + /// Write a primitive type to an I/O address: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_writes(io: &Mmio<0x1000>) { + /// // 32-bit write of value `1` at address `0x10`. + /// io.write(0x10, 1u32); + /// + /// // 8-bit write of value `0xff` at the top of the I/O space. + /// io.write(0xfff, 0xffu8); + /// } + /// ``` #[inline(always)] fn write(&self, location: L, value: T) where @@ -514,6 +579,23 @@ fn write(&self, location: L, value: T) /// /// Note: this does not perform any synchronization. The caller is responsible for ensuring /// exclusive access if required. + /// + /// # Examples + /// + /// Read the u32 value at address `0x10`, increment it, and store the updated value back: + /// + /// ```no_run + /// use kernel::io::{ + /// Io, + /// Mmio, + /// }; + /// + /// fn do_update(io: &Mmio<0x1000>) { + /// io.update(0x10, |v: u32| { + /// v + 1 + /// }) + /// } + /// ``` #[inline(always)] fn update(&self, location: L, f: F) where From 20ba6a1dbcb957152f6d858015b3a3311dd6da49 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:17 +0900 Subject: [PATCH 34/67] rust: io: add `register!` macro Add a macro for defining hardware register types with I/O accessors. Each register field is represented as a `Bounded` of the appropriate bit width, ensuring field values are never silently truncated. Fields can optionally be converted to/from custom types, either fallibly or infallibly. The address of registers can be direct, relative, or indexed, supporting most of the patterns in which registers are arranged. Suggested-by: Danilo Krummrich Link: https://lore.kernel.org/all/20250306222336.23482-6-dakr@kernel.org/ Co-developed-by: Gary Guo Signed-off-by: Gary Guo Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20260314-register-v9-7-86805b2f7e9d@nvidia.com [ * Improve wording and formatting of doc-comments, * Import build_assert!(), * Add missing inline annotations, * Call static_assert!() with absolute path, * Use expect instead of allow. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 5 +- rust/kernel/io/register.rs | 1229 ++++++++++++++++++++++++++++++++++++ 2 files changed, 1233 insertions(+), 1 deletion(-) create mode 100644 rust/kernel/io/register.rs diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index c51a87b9169b..4950cecf30ca 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -11,8 +11,10 @@ pub mod mem; pub mod poll; +pub mod register; pub mod resource; +pub use crate::register; pub use resource::Resource; /// Physical address type. @@ -179,7 +181,8 @@ pub trait IoCapable { /// This trait is the key abstraction allowing [`Io::read`], [`Io::write`], and [`Io::update`] (and /// their fallible [`try_read`](Io::try_read), [`try_write`](Io::try_write) and /// [`try_update`](Io::try_update) counterparts) to work uniformly with both raw [`usize`] offsets -/// (for primitive types like [`u32`]) and typed ones. +/// (for primitive types like [`u32`]) and typed ones (like those generated by the [`register!`] +/// macro). /// /// An `IoLoc` carries three pieces of information: /// diff --git a/rust/kernel/io/register.rs b/rust/kernel/io/register.rs new file mode 100644 index 000000000000..dbd458aaa761 --- /dev/null +++ b/rust/kernel/io/register.rs @@ -0,0 +1,1229 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Macro to define register layout and accessors. +//! +//! The [`register!`](kernel::io::register!) macro provides an intuitive and readable syntax for +//! defining a dedicated type for each register and accessing it using [`Io`](super::Io). Each such +//! type comes with its own field accessors that can return an error if a field's value is invalid. +//! +//! Note: most of the items in this module are public so they can be referenced by the macro, but +//! most are not to be used directly by users. Outside of the `register!` macro itself, the only +//! items you might want to import from this module are [`WithBase`] and [`Array`]. +//! +//! # Simple example +//! +//! ```no_run +//! use kernel::io::register; +//! +//! register! { +//! /// Basic information about the chip. +//! pub BOOT_0(u32) @ 0x00000100 { +//! /// Vendor ID. +//! 15:8 vendor_id; +//! /// Major revision of the chip. +//! 7:4 major_revision; +//! /// Minor revision of the chip. +//! 3:0 minor_revision; +//! } +//! } +//! ``` +//! +//! This defines a 32-bit `BOOT_0` type which can be read from or written to offset `0x100` of an +//! `Io` region, with the described bitfields. For instance, `minor_revision` consists of the 4 +//! least significant bits of the type. +//! +//! Fields are instances of [`Bounded`](kernel::num::Bounded) and can be read by calling their +//! getter method, which is named after them. They also have setter methods prefixed with `with_` +//! for runtime values and `with_const_` for constant values. All setters return the updated +//! register value. +//! +//! Fields can also be transparently converted from/to an arbitrary type by using the `=>` and +//! `?=>` syntaxes. +//! +//! If present, doc comments above register or fields definitions are added to the relevant item +//! they document (the register type itself, or the field's setter and getter methods). +//! +//! Note that multiple registers can be defined in a single `register!` invocation. This can be +//! useful to group related registers together. +//! +//! Here is how the register defined above can be used in code: +//! +//! +//! ```no_run +//! use kernel::{ +//! io::{ +//! register, +//! Io, +//! IoLoc, +//! }, +//! num::Bounded, +//! }; +//! # use kernel::io::Mmio; +//! # register! { +//! # pub BOOT_0(u32) @ 0x00000100 { +//! # 15:8 vendor_id; +//! # 7:4 major_revision; +//! # 3:0 minor_revision; +//! # } +//! # } +//! # fn test(io: &Mmio<0x1000>) { +//! # fn obtain_vendor_id() -> u8 { 0xff } +//! +//! // Read from the register's defined offset (0x100). +//! let boot0 = io.read(BOOT_0); +//! pr_info!("chip revision: {}.{}", boot0.major_revision().get(), boot0.minor_revision().get()); +//! +//! // Update some fields and write the new value back. +//! let new_boot0 = boot0 +//! // Constant values. +//! .with_const_major_revision::<3>() +//! .with_const_minor_revision::<10>() +//! // Runtime value. +//! .with_vendor_id(obtain_vendor_id()); +//! io.write((), new_boot0); +//! +//! // Or, build a new value from zero and write it: +//! io.write((), BOOT_0::zeroed() +//! .with_const_major_revision::<3>() +//! .with_const_minor_revision::<10>() +//! .with_vendor_id(obtain_vendor_id()) +//! ); +//! +//! // Or, read and update the register in a single step. +//! io.update(BOOT_0, |r| r +//! .with_const_major_revision::<3>() +//! .with_const_minor_revision::<10>() +//! .with_vendor_id(obtain_vendor_id()) +//! ); +//! +//! // Constant values can also be built using the const setters. +//! const V: BOOT_0 = pin_init::zeroed::() +//! .with_const_major_revision::<3>() +//! .with_const_minor_revision::<10>(); +//! # } +//! ``` +//! +//! For more extensive documentation about how to define registers, see the +//! [`register!`](kernel::io::register!) macro. + +use core::marker::PhantomData; + +use crate::io::IoLoc; + +use kernel::build_assert; + +/// Trait implemented by all registers. +pub trait Register: Sized { + /// Backing primitive type of the register. + type Storage: Into + From; + + /// Start offset of the register. + /// + /// The interpretation of this offset depends on the type of the register. + const OFFSET: usize; +} + +/// Trait implemented by registers with a fixed offset. +pub trait FixedRegister: Register {} + +/// Allows `()` to be used as the `location` parameter of [`Io::write`](super::Io::write) when +/// passing a [`FixedRegister`] value. +impl IoLoc for () +where + T: FixedRegister, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + T::OFFSET + } +} + +/// A [`FixedRegister`] carries its location in its type. Thus `FixedRegister` values can be used +/// as an [`IoLoc`]. +impl IoLoc for T +where + T: FixedRegister, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + T::OFFSET + } +} + +/// Location of a fixed register. +pub struct FixedRegisterLoc(PhantomData); + +impl FixedRegisterLoc { + /// Returns the location of `T`. + #[inline(always)] + // We do not implement `Default` so we can be const. + #[expect(clippy::new_without_default)] + pub const fn new() -> Self { + Self(PhantomData) + } +} + +impl IoLoc for FixedRegisterLoc +where + T: FixedRegister, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + T::OFFSET + } +} + +/// Trait providing a base address to be added to the offset of a relative register to obtain +/// its actual offset. +/// +/// The `T` generic argument is used to distinguish which base to use, in case a type provides +/// several bases. It is given to the `register!` macro to restrict the use of the register to +/// implementors of this particular variant. +pub trait RegisterBase { + /// Base address to which register offsets are added. + const BASE: usize; +} + +/// Trait implemented by all registers that are relative to a base. +pub trait WithBase { + /// Family of bases applicable to this register. + type BaseFamily; + + /// Returns the absolute location of this type when using `B` as its base. + #[inline(always)] + fn of>() -> RelativeRegisterLoc + where + Self: Register, + { + RelativeRegisterLoc::new() + } +} + +/// Trait implemented by relative registers. +pub trait RelativeRegister: Register + WithBase {} + +/// Location of a relative register. +/// +/// This can either be an immediately accessible regular [`RelativeRegister`], or a +/// [`RelativeRegisterArray`] that needs one additional resolution through +/// [`RelativeRegisterLoc::at`]. +pub struct RelativeRegisterLoc(PhantomData, PhantomData); + +impl RelativeRegisterLoc +where + T: Register + WithBase, + B: RegisterBase + ?Sized, +{ + /// Returns the location of a relative register or register array. + #[inline(always)] + // We do not implement `Default` so we can be const. + #[expect(clippy::new_without_default)] + pub const fn new() -> Self { + Self(PhantomData, PhantomData) + } + + // Returns the absolute offset of the relative register using base `B`. + // + // This is implemented as a private const method so it can be reused by the [`IoLoc`] + // implementations of both [`RelativeRegisterLoc`] and [`RelativeRegisterArrayLoc`]. + #[inline] + const fn offset(self) -> usize { + B::BASE + T::OFFSET + } +} + +impl IoLoc for RelativeRegisterLoc +where + T: RelativeRegister, + B: RegisterBase + ?Sized, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + RelativeRegisterLoc::offset(self) + } +} + +/// Trait implemented by arrays of registers. +pub trait RegisterArray: Register { + /// Number of elements in the registers array. + const SIZE: usize; + /// Number of bytes between the start of elements in the registers array. + const STRIDE: usize; +} + +/// Location of an array register. +pub struct RegisterArrayLoc(usize, PhantomData); + +impl RegisterArrayLoc { + /// Returns the location of register `T` at position `idx`, with build-time validation. + #[inline(always)] + pub fn new(idx: usize) -> Self { + build_assert!(idx < T::SIZE); + + Self(idx, PhantomData) + } + + /// Attempts to return the location of register `T` at position `idx`, with runtime validation. + #[inline(always)] + pub fn try_new(idx: usize) -> Option { + if idx < T::SIZE { + Some(Self(idx, PhantomData)) + } else { + None + } + } +} + +impl IoLoc for RegisterArrayLoc +where + T: RegisterArray, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + T::OFFSET + self.0 * T::STRIDE + } +} + +/// Trait providing location builders for [`RegisterArray`]s. +pub trait Array { + /// Returns the location of the register at position `idx`, with build-time validation. + #[inline(always)] + fn at(idx: usize) -> RegisterArrayLoc + where + Self: RegisterArray, + { + RegisterArrayLoc::new(idx) + } + + /// Returns the location of the register at position `idx`, with runtime validation. + #[inline(always)] + fn try_at(idx: usize) -> Option> + where + Self: RegisterArray, + { + RegisterArrayLoc::try_new(idx) + } +} + +/// Trait implemented by arrays of relative registers. +pub trait RelativeRegisterArray: RegisterArray + WithBase {} + +/// Location of a relative array register. +pub struct RelativeRegisterArrayLoc< + T: RelativeRegisterArray, + B: RegisterBase + ?Sized, +>(RelativeRegisterLoc, usize); + +impl RelativeRegisterArrayLoc +where + T: RelativeRegisterArray, + B: RegisterBase + ?Sized, +{ + /// Returns the location of register `T` from the base `B` at index `idx`, with build-time + /// validation. + #[inline(always)] + pub fn new(idx: usize) -> Self { + build_assert!(idx < T::SIZE); + + Self(RelativeRegisterLoc::new(), idx) + } + + /// Attempts to return the location of register `T` from the base `B` at index `idx`, with + /// runtime validation. + #[inline(always)] + pub fn try_new(idx: usize) -> Option { + if idx < T::SIZE { + Some(Self(RelativeRegisterLoc::new(), idx)) + } else { + None + } + } +} + +/// Methods exclusive to [`RelativeRegisterLoc`]s created with a [`RelativeRegisterArray`]. +impl RelativeRegisterLoc +where + T: RelativeRegisterArray, + B: RegisterBase + ?Sized, +{ + /// Returns the location of the register at position `idx`, with build-time validation. + #[inline(always)] + pub fn at(self, idx: usize) -> RelativeRegisterArrayLoc { + RelativeRegisterArrayLoc::new(idx) + } + + /// Returns the location of the register at position `idx`, with runtime validation. + #[inline(always)] + pub fn try_at(self, idx: usize) -> Option> { + RelativeRegisterArrayLoc::try_new(idx) + } +} + +impl IoLoc for RelativeRegisterArrayLoc +where + T: RelativeRegisterArray, + B: RegisterBase + ?Sized, +{ + type IoType = T::Storage; + + #[inline(always)] + fn offset(self) -> usize { + self.0.offset() + self.1 * T::STRIDE + } +} + +/// Defines a dedicated type for a register, including getter and setter methods for its fields and +/// methods to read and write it from an [`Io`](kernel::io::Io) region. +/// +/// This documentation focuses on how to declare registers. See the [module-level +/// documentation](mod@kernel::io::register) for examples of how to access them. +/// +/// There are 4 possible kinds of registers: fixed offset registers, relative registers, arrays of +/// registers, and relative arrays of registers. +/// +/// ## Fixed offset registers +/// +/// These are the simplest kind of registers. Their location is simply an offset inside the I/O +/// region. For instance: +/// +/// ```ignore +/// register! { +/// pub FIXED_REG(u16) @ 0x80 { +/// ... +/// } +/// } +/// ``` +/// +/// This creates a 16-bit register named `FIXED_REG` located at offset `0x80` of an I/O region. +/// +/// These registers' location can be built simply by referencing their name: +/// +/// ```no_run +/// use kernel::{ +/// io::{ +/// register, +/// Io, +/// }, +/// }; +/// # use kernel::io::Mmio; +/// +/// register! { +/// FIXED_REG(u32) @ 0x100 { +/// 16:8 high_byte; +/// 7:0 low_byte; +/// } +/// } +/// +/// # fn test(io: &Mmio<0x1000>) { +/// let val = io.read(FIXED_REG); +/// +/// // Write from an already-existing value. +/// io.write(FIXED_REG, val.with_low_byte(0xff)); +/// +/// // Create a register value from scratch. +/// let val2 = FIXED_REG::zeroed().with_high_byte(0x80); +/// +/// // The location of fixed offset registers is already contained in their type. Thus, the +/// // `location` argument of `Io::write` is technically redundant and can be replaced by `()`. +/// io.write((), val2); +/// # } +/// +/// ``` +/// +/// It is possible to create an alias of an existing register with new field definitions by using +/// the `=> ALIAS` syntax. This is useful for cases where a register's interpretation depends on +/// the context: +/// +/// ```no_run +/// use kernel::io::register; +/// +/// register! { +/// /// Scratch register. +/// pub SCRATCH(u32) @ 0x00000200 { +/// 31:0 value; +/// } +/// +/// /// Boot status of the firmware. +/// pub SCRATCH_BOOT_STATUS(u32) => SCRATCH { +/// 0:0 completed; +/// } +/// } +/// ``` +/// +/// In this example, `SCRATCH_BOOT_STATUS` uses the same I/O address as `SCRATCH`, while providing +/// its own `completed` field. +/// +/// ## Relative registers +/// +/// Relative registers can be instantiated several times at a relative offset of a group of bases. +/// For instance, imagine the following I/O space: +/// +/// ```text +/// +-----------------------------+ +/// | ... | +/// | | +/// 0x100--->+------------CPU0-------------+ +/// | | +/// 0x110--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// | | +/// | | +/// 0x200--->+------------CPU1-------------+ +/// | | +/// 0x210--->+-----------------------------+ +/// | CPU_CTL | +/// +-----------------------------+ +/// | ... | +/// +-----------------------------+ +/// ``` +/// +/// `CPU0` and `CPU1` both have a `CPU_CTL` register that starts at offset `0x10` of their I/O +/// space segment. Since both instances of `CPU_CTL` share the same layout, we don't want to define +/// them twice and would prefer a way to select which one to use from a single definition. +/// +/// This can be done using the `Base + Offset` syntax when specifying the register's address: +/// +/// ```ignore +/// register! { +/// pub RELATIVE_REG(u32) @ Base + 0x80 { +/// ... +/// } +/// } +/// ``` +/// +/// This creates a register with an offset of `0x80` from a given base. +/// +/// `Base` is an arbitrary type (typically a ZST) to be used as a generic parameter of the +/// [`RegisterBase`] trait to provide the base as a constant, i.e. each type providing a base for +/// this register needs to implement `RegisterBase`. +/// +/// The location of relative registers can be built using the [`WithBase::of`] method to specify +/// its base. All relative registers implement [`WithBase`]. +/// +/// Here is the above layout translated into code: +/// +/// ```no_run +/// use kernel::{ +/// io::{ +/// register, +/// register::{ +/// RegisterBase, +/// WithBase, +/// }, +/// Io, +/// }, +/// }; +/// # use kernel::io::Mmio; +/// +/// // Type used to identify the base. +/// pub struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// +/// // This makes `CPU_CTL` accessible from all implementors of `RegisterBase`. +/// register! { +/// /// CPU core control. +/// pub CPU_CTL(u32) @ CpuCtlBase + 0x10 { +/// 0:0 start; +/// } +/// } +/// +/// # fn test(io: Mmio<0x1000>) { +/// // Read the status of `Cpu0`. +/// let cpu0_started = io.read(CPU_CTL::of::()); +/// +/// // Stop `Cpu0`. +/// io.write(WithBase::of::(), CPU_CTL::zeroed()); +/// # } +/// +/// // Aliases can also be defined for relative register. +/// register! { +/// /// Alias to CPU core control. +/// pub CPU_CTL_ALIAS(u32) => CpuCtlBase + CPU_CTL { +/// /// Start the aliased CPU core. +/// 1:1 alias_start; +/// } +/// } +/// +/// # fn test2(io: Mmio<0x1000>) { +/// // Start the aliased `CPU0`, leaving its other fields untouched. +/// io.update(CPU_CTL_ALIAS::of::(), |r| r.with_alias_start(true)); +/// # } +/// ``` +/// +/// ## Arrays of registers +/// +/// Some I/O areas contain consecutive registers that share the same field layout. These areas can +/// be defined as an array of identical registers, allowing them to be accessed by index with +/// compile-time or runtime bound checking: +/// +/// ```ignore +/// register! { +/// pub REGISTER_ARRAY(u8)[10, stride = 4] @ 0x100 { +/// ... +/// } +/// } +/// ``` +/// +/// This defines `REGISTER_ARRAY`, an array of 10 byte registers starting at offset `0x100`. Each +/// register is separated from its neighbor by 4 bytes. +/// +/// The `stride` parameter is optional; if unspecified, the registers are placed consecutively from +/// each other. +/// +/// A location for a register in a register array is built using the [`Array::at`] trait method. +/// All arrays of registers implement [`Array`]. +/// +/// ```no_run +/// use kernel::{ +/// io::{ +/// register, +/// register::Array, +/// Io, +/// }, +/// }; +/// # use kernel::io::Mmio; +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// +/// // Array of 64 consecutive registers with the same layout starting at offset `0x80`. +/// register! { +/// /// Scratch registers. +/// pub SCRATCH(u32)[64] @ 0x00000080 { +/// 31:0 value; +/// } +/// } +/// +/// # fn test(io: &Mmio<0x1000>) +/// # -> Result<(), Error>{ +/// // Read scratch register 0, i.e. I/O address `0x80`. +/// let scratch_0 = io.read(SCRATCH::at(0)).value(); +/// +/// // Write scratch register 15, i.e. I/O address `0x80 + (15 * 4)`. +/// io.write(Array::at(15), SCRATCH::from(0xffeeaabb)); +/// +/// // This is out of bounds and won't build. +/// // let scratch_128 = io.read(SCRATCH::at(128)).value(); +/// +/// // Runtime-obtained array index. +/// let idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let some_scratch = io.read(SCRATCH::try_at(idx).ok_or(EINVAL)?).value(); +/// +/// // Alias to a specific register in an array. +/// // Here `SCRATCH[8]` is used to convey the firmware exit code. +/// register! { +/// /// Firmware exit status code. +/// pub FIRMWARE_STATUS(u32) => SCRATCH[8] { +/// 7:0 status; +/// } +/// } +/// +/// let status = io.read(FIRMWARE_STATUS).status(); +/// +/// // Non-contiguous register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array is separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register! { +/// /// Scratch registers bank 0. +/// pub SCRATCH_INTERLEAVED_0(u32)[16, stride = 8] @ 0x000000c0 { +/// 31:0 value; +/// } +/// +/// /// Scratch registers bank 1. +/// pub SCRATCH_INTERLEAVED_1(u32)[16, stride = 8] @ 0x000000c4 { +/// 31:0 value; +/// } +/// } +/// # Ok(()) +/// # } +/// ``` +/// +/// ## Relative arrays of registers +/// +/// Combining the two features described in the sections above, arrays of registers accessible from +/// a base can also be defined: +/// +/// ```ignore +/// register! { +/// pub RELATIVE_REGISTER_ARRAY(u8)[10, stride = 4] @ Base + 0x100 { +/// ... +/// } +/// } +/// ``` +/// +/// Like relative registers, they implement the [`WithBase`] trait. However the return value of +/// [`WithBase::of`] cannot be used directly as a location and must be further specified using the +/// [`at`](RelativeRegisterLoc::at) method. +/// +/// ```no_run +/// use kernel::{ +/// io::{ +/// register, +/// register::{ +/// RegisterBase, +/// WithBase, +/// }, +/// Io, +/// }, +/// }; +/// # use kernel::io::Mmio; +/// # fn get_scratch_idx() -> usize { +/// # 0x15 +/// # } +/// +/// // Type used as parameter of `RegisterBase` to specify the base. +/// pub struct CpuCtlBase; +/// +/// // ZST describing `CPU0`. +/// struct Cpu0; +/// impl RegisterBase for Cpu0 { +/// const BASE: usize = 0x100; +/// } +/// +/// // ZST describing `CPU1`. +/// struct Cpu1; +/// impl RegisterBase for Cpu1 { +/// const BASE: usize = 0x200; +/// } +/// +/// // 64 per-cpu scratch registers, arranged as a contiguous array. +/// register! { +/// /// Per-CPU scratch registers. +/// pub CPU_SCRATCH(u32)[64] @ CpuCtlBase + 0x00000080 { +/// 31:0 value; +/// } +/// } +/// +/// # fn test(io: &Mmio<0x1000>) -> Result<(), Error> { +/// // Read scratch register 0 of CPU0. +/// let scratch = io.read(CPU_SCRATCH::of::().at(0)); +/// +/// // Write the retrieved value into scratch register 15 of CPU1. +/// io.write(WithBase::of::().at(15), scratch); +/// +/// // This won't build. +/// // let cpu0_scratch_128 = io.read(CPU_SCRATCH::of::().at(128)).value(); +/// +/// // Runtime-obtained array index. +/// let scratch_idx = get_scratch_idx(); +/// // Access on a runtime index returns an error if it is out-of-bounds. +/// let cpu0_scratch = io.read( +/// CPU_SCRATCH::of::().try_at(scratch_idx).ok_or(EINVAL)? +/// ).value(); +/// # Ok(()) +/// # } +/// +/// // Alias to `SCRATCH[8]` used to convey the firmware exit code. +/// register! { +/// /// Per-CPU firmware exit status code. +/// pub CPU_FIRMWARE_STATUS(u32) => CpuCtlBase + CPU_SCRATCH[8] { +/// 7:0 status; +/// } +/// } +/// +/// // Non-contiguous relative register arrays can be defined by adding a stride parameter. +/// // Here, each of the 16 registers of the array is separated by 8 bytes, meaning that the +/// // registers of the two declarations below are interleaved. +/// register! { +/// /// Scratch registers bank 0. +/// pub CPU_SCRATCH_INTERLEAVED_0(u32)[16, stride = 8] @ CpuCtlBase + 0x00000d00 { +/// 31:0 value; +/// } +/// +/// /// Scratch registers bank 1. +/// pub CPU_SCRATCH_INTERLEAVED_1(u32)[16, stride = 8] @ CpuCtlBase + 0x00000d04 { +/// 31:0 value; +/// } +/// } +/// +/// # fn test2(io: &Mmio<0x1000>) -> Result<(), Error> { +/// let cpu0_status = io.read(CPU_FIRMWARE_STATUS::of::()).status(); +/// # Ok(()) +/// # } +/// ``` +#[macro_export] +macro_rules! register { + // Entry point for the macro, allowing multiple registers to be defined in one call. + // It matches all possible register declaration patterns to dispatch them to corresponding + // `@reg` rule that defines a single register. + ( + $( + $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) + $([ $size:expr $(, stride = $stride:expr)? ])? + $(@ $($base:ident +)? $offset:literal)? + $(=> $alias:ident $(+ $alias_offset:ident)? $([$alias_idx:expr])? )? + { $($fields:tt)* } + )* + ) => { + $( + $crate::register!( + @reg $(#[$attr])* $vis $name ($storage) $([$size $(, stride = $stride)?])? + $(@ $($base +)? $offset)? + $(=> $alias $(+ $alias_offset)? $([$alias_idx])? )? + { $($fields)* } + ); + )* + }; + + // All the rules below are private helpers. + + // Creates a register at a fixed offset of the MMIO space. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) @ $offset:literal + { $($fields:tt)* } + ) => { + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!(@io_base $name($storage) @ $offset); + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); + }; + + // Creates an alias register of fixed offset register `alias` with its own fields. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $alias:ident + { $($fields:tt)* } + ) => { + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!( + @io_base $name($storage) @ + <$alias as $crate::io::register::Register>::OFFSET + ); + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); + }; + + // Creates a register at a relative offset from a base address provider. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) @ $base:ident + $offset:literal + { $($fields:tt)* } + ) => { + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!(@io_base $name($storage) @ $offset); + $crate::register!(@io_relative $vis $name($storage) @ $base); + }; + + // Creates an alias register of relative offset register `alias` with its own fields. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $base:ident + $alias:ident + { $($fields:tt)* } + ) => { + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!( + @io_base $name($storage) @ <$alias as $crate::io::register::Register>::OFFSET + ); + $crate::register!(@io_relative $vis $name($storage) @ $base); + }; + + // Creates an array of registers at a fixed offset of the MMIO space. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) + [ $size:expr, stride = $stride:expr ] @ $offset:literal { $($fields:tt)* } + ) => { + ::kernel::static_assert!(::core::mem::size_of::<$storage>() <= $stride); + + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!(@io_base $name($storage) @ $offset); + $crate::register!(@io_array $vis $name($storage) [ $size, stride = $stride ]); + }; + + // Shortcut for contiguous array of registers (stride == size of element). + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) [ $size:expr ] @ $offset:literal + { $($fields:tt)* } + ) => { + $crate::register!( + $(#[$attr])* $vis $name($storage) [ $size, stride = ::core::mem::size_of::<$storage>() ] + @ $offset { $($fields)* } + ); + }; + + // Creates an alias of register `idx` of array of registers `alias` with its own fields. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) => $alias:ident [ $idx:expr ] + { $($fields:tt)* } + ) => { + ::kernel::static_assert!($idx < <$alias as $crate::io::register::RegisterArray>::SIZE); + + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!( + @io_base $name($storage) @ + <$alias as $crate::io::register::Register>::OFFSET + + $idx * <$alias as $crate::io::register::RegisterArray>::STRIDE + ); + $crate::register!(@io_fixed $(#[$attr])* $vis $name($storage)); + }; + + // Creates an array of registers at a relative offset from a base address provider. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) + [ $size:expr, stride = $stride:expr ] + @ $base:ident + $offset:literal { $($fields:tt)* } + ) => { + ::kernel::static_assert!(::core::mem::size_of::<$storage>() <= $stride); + + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!(@io_base $name($storage) @ $offset); + $crate::register!( + @io_relative_array $vis $name($storage) [ $size, stride = $stride ] @ $base + $offset + ); + }; + + // Shortcut for contiguous array of relative registers (stride == size of element). + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) [ $size:expr ] + @ $base:ident + $offset:literal { $($fields:tt)* } + ) => { + $crate::register!( + $(#[$attr])* $vis $name($storage) [ $size, stride = ::core::mem::size_of::<$storage>() ] + @ $base + $offset { $($fields)* } + ); + }; + + // Creates an alias of register `idx` of relative array of registers `alias` with its own + // fields. + ( + @reg $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty) + => $base:ident + $alias:ident [ $idx:expr ] { $($fields:tt)* } + ) => { + ::kernel::static_assert!($idx < <$alias as $crate::io::register::RegisterArray>::SIZE); + + $crate::register!(@bitfield $(#[$attr])* $vis struct $name($storage) { $($fields)* }); + $crate::register!( + @io_base $name($storage) @ + <$alias as $crate::io::register::Register>::OFFSET + + $idx * <$alias as $crate::io::register::RegisterArray>::STRIDE + ); + $crate::register!(@io_relative $vis $name($storage) @ $base); + }; + + // Generates the bitfield for the register. + // + // `#[allow(non_camel_case_types)]` is added since register names typically use + // `SCREAMING_CASE`. + ( + @bitfield $(#[$attr:meta])* $vis:vis struct $name:ident($storage:ty) { $($fields:tt)* } + ) => { + $crate::register!(@bitfield_core + #[allow(non_camel_case_types)] + $(#[$attr])* $vis $name $storage + ); + $crate::register!(@bitfield_fields $vis $name $storage { $($fields)* }); + }; + + // Implementations shared by all registers types. + (@io_base $name:ident($storage:ty) @ $offset:expr) => { + impl $crate::io::register::Register for $name { + type Storage = $storage; + + const OFFSET: usize = $offset; + } + }; + + // Implementations of fixed registers. + (@io_fixed $(#[$attr:meta])* $vis:vis $name:ident ($storage:ty)) => { + impl $crate::io::register::FixedRegister for $name {} + + $(#[$attr])* + $vis const $name: $crate::io::register::FixedRegisterLoc<$name> = + $crate::io::register::FixedRegisterLoc::<$name>::new(); + }; + + // Implementations of relative registers. + (@io_relative $vis:vis $name:ident ($storage:ty) @ $base:ident) => { + impl $crate::io::register::WithBase for $name { + type BaseFamily = $base; + } + + impl $crate::io::register::RelativeRegister for $name {} + }; + + // Implementations of register arrays. + (@io_array $vis:vis $name:ident ($storage:ty) [ $size:expr, stride = $stride:expr ]) => { + impl $crate::io::register::Array for $name {} + + impl $crate::io::register::RegisterArray for $name { + const SIZE: usize = $size; + const STRIDE: usize = $stride; + } + }; + + // Implementations of relative array registers. + ( + @io_relative_array $vis:vis $name:ident ($storage:ty) [ $size:expr, stride = $stride:expr ] + @ $base:ident + $offset:literal + ) => { + impl $crate::io::register::WithBase for $name { + type BaseFamily = $base; + } + + impl $crate::io::register::RegisterArray for $name { + const SIZE: usize = $size; + const STRIDE: usize = $stride; + } + + impl $crate::io::register::RelativeRegisterArray for $name {} + }; + + // Defines the wrapper `$name` type and its conversions from/to the storage type. + (@bitfield_core $(#[$attr:meta])* $vis:vis $name:ident $storage:ty) => { + $(#[$attr])* + #[repr(transparent)] + #[derive(Clone, Copy, PartialEq, Eq)] + $vis struct $name { + inner: $storage, + } + + #[allow(dead_code)] + impl $name { + /// Creates a bitfield from a raw value. + #[inline(always)] + $vis const fn from_raw(value: $storage) -> Self { + Self{ inner: value } + } + + /// Turns this bitfield into its raw value. + /// + /// This is similar to the [`From`] implementation, but is shorter to invoke in + /// most cases. + #[inline(always)] + $vis const fn into_raw(self) -> $storage { + self.inner + } + } + + // SAFETY: `$storage` is `Zeroable` and `$name` is transparent. + unsafe impl ::pin_init::Zeroable for $name {} + + impl ::core::convert::From<$name> for $storage { + #[inline(always)] + fn from(val: $name) -> $storage { + val.into_raw() + } + } + + impl ::core::convert::From<$storage> for $name { + #[inline(always)] + fn from(val: $storage) -> $name { + Self::from_raw(val) + } + } + }; + + // Definitions requiring knowledge of individual fields: private and public field accessors, + // and `Debug` implementation. + (@bitfield_fields $vis:vis $name:ident $storage:ty { + $($(#[doc = $doc:expr])* $hi:literal:$lo:literal $field:ident + $(?=> $try_into_type:ty)? + $(=> $into_type:ty)? + ; + )* + } + ) => { + #[allow(dead_code)] + impl $name { + $( + $crate::register!(@private_field_accessors $vis $name $storage : $hi:$lo $field); + $crate::register!( + @public_field_accessors $(#[doc = $doc])* $vis $name $storage : $hi:$lo $field + $(?=> $try_into_type)? + $(=> $into_type)? + ); + )* + } + + $crate::register!(@debug $name { $($field;)* }); + }; + + // Private field accessors working with the exact `Bounded` type for the field. + ( + @private_field_accessors $vis:vis $name:ident $storage:ty : $hi:tt:$lo:tt $field:ident + ) => { + ::kernel::macros::paste!( + $vis const [<$field:upper _RANGE>]: ::core::ops::RangeInclusive = $lo..=$hi; + $vis const [<$field:upper _MASK>]: $storage = + ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); + $vis const [<$field:upper _SHIFT>]: u32 = $lo; + ); + + ::kernel::macros::paste!( + fn [<__ $field>](self) -> + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> { + // Left shift to align the field's MSB with the storage MSB. + const ALIGN_TOP: u32 = $storage::BITS - ($hi + 1); + // Right shift to move the top-aligned field to bit 0 of the storage. + const ALIGN_BOTTOM: u32 = ALIGN_TOP + $lo; + + // Extract the field using two shifts. `Bounded::shr` produces the correctly-sized + // output type. + let val = ::kernel::num::Bounded::<$storage, { $storage::BITS }>::from( + self.inner << ALIGN_TOP + ); + val.shr::() + } + + const fn [<__with_ $field>]( + mut self, + value: ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>, + ) -> Self + { + const MASK: $storage = <$name>::[<$field:upper _MASK>]; + const SHIFT: u32 = <$name>::[<$field:upper _SHIFT>]; + + let value = value.get() << SHIFT; + self.inner = (self.inner & !MASK) | value; + + self + } + ); + }; + + // Public accessors for fields infallibly (`=>`) converted to a type. + ( + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : + $hi:literal:$lo:literal $field:ident => $into_type:ty + ) => { + ::kernel::macros::paste!( + + $(#[doc = $doc])* + #[doc = "Returns the value of this field."] + #[inline(always)] + $vis fn $field(self) -> $into_type + { + self.[<__ $field>]().into() + } + + $(#[doc = $doc])* + #[doc = "Sets this field to the given `value`."] + #[inline(always)] + $vis fn [](self, value: $into_type) -> Self + { + self.[<__with_ $field>](value.into()) + } + + ); + }; + + // Public accessors for fields fallibly (`?=>`) converted to a type. + ( + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : + $hi:tt:$lo:tt $field:ident ?=> $try_into_type:ty + ) => { + ::kernel::macros::paste!( + + $(#[doc = $doc])* + #[doc = "Returns the value of this field."] + #[inline(always)] + $vis fn $field(self) -> + Result< + $try_into_type, + <$try_into_type as ::core::convert::TryFrom< + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> + >>::Error + > + { + self.[<__ $field>]().try_into() + } + + $(#[doc = $doc])* + #[doc = "Sets this field to the given `value`."] + #[inline(always)] + $vis fn [](self, value: $try_into_type) -> Self + { + self.[<__with_ $field>](value.into()) + } + + ); + }; + + // Public accessors for fields not converted to a type. + ( + @public_field_accessors $(#[doc = $doc:expr])* $vis:vis $name:ident $storage:ty : + $hi:tt:$lo:tt $field:ident + ) => { + ::kernel::macros::paste!( + + $(#[doc = $doc])* + #[doc = "Returns the value of this field."] + #[inline(always)] + $vis fn $field(self) -> + ::kernel::num::Bounded<$storage, { $hi + 1 - $lo }> + { + self.[<__ $field>]() + } + + $(#[doc = $doc])* + #[doc = "Sets this field to the compile-time constant `VALUE`."] + #[inline(always)] + $vis const fn [](self) -> Self { + self.[<__with_ $field>]( + ::kernel::num::Bounded::<$storage, { $hi + 1 - $lo }>::new::() + ) + } + + $(#[doc = $doc])* + #[doc = "Sets this field to the given `value`."] + #[inline(always)] + $vis fn []( + self, + value: T, + ) -> Self + where T: Into<::kernel::num::Bounded<$storage, { $hi + 1 - $lo }>>, + { + self.[<__with_ $field>](value.into()) + } + + $(#[doc = $doc])* + #[doc = "Tries to set this field to `value`, returning an error if it is out of range."] + #[inline(always)] + $vis fn []( + self, + value: T, + ) -> ::kernel::error::Result + where T: ::kernel::num::TryIntoBounded<$storage, { $hi + 1 - $lo }>, + { + Ok( + self.[<__with_ $field>]( + value.try_into_bounded().ok_or(::kernel::error::code::EOVERFLOW)? + ) + ) + } + + ); + }; + + // `Debug` implementation. + (@debug $name:ident { $($field:ident;)* }) => { + impl ::kernel::fmt::Debug for $name { + fn fmt(&self, f: &mut ::kernel::fmt::Formatter<'_>) -> ::kernel::fmt::Result { + f.debug_struct(stringify!($name)) + .field("", &::kernel::prelude::fmt!("{:#x}", self.inner)) + $( + .field(stringify!($field), &self.$field()) + )* + .finish() + } + } + }; +} From 9a52a8f5ed97d47c9641248874f4c6a78e136d97 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:18 +0900 Subject: [PATCH 35/67] rust: io: introduce `write_reg` and `LocatedRegister` Some I/O types, like fixed address registers, carry their location alongside their values. For these types, the regular `Io::write` method can lead into repeating the location information twice: once to provide the location itself, another time to build the value. We are also considering supporting making all register values carry their full location information for convenience and safety. Add a new `Io::write_reg` method that takes a single argument implementing `LocatedRegister`, a trait that decomposes implementors into a `(location, value)` tuple. This allows write operations on fixed offset registers to be done while specifying their name only once. Suggested-by: Danilo Krummrich Link: https://lore.kernel.org/all/DH0XBLXZD81K.22SWIZ1ZAOW1@kernel.org/ Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20260314-register-v9-8-86805b2f7e9d@nvidia.com [ Replace FIFO with VERSION register in the examples. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/io.rs | 87 ++++++++++++++++++++++++++++++++++++++ rust/kernel/io/register.rs | 35 ++++++++++++++- 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs index 4950cecf30ca..fcc7678fd9e3 100644 --- a/rust/kernel/io.rs +++ b/rust/kernel/io.rs @@ -17,6 +17,8 @@ pub use crate::register; pub use resource::Resource; +use register::LocatedRegister; + /// Physical address type. /// /// This is a type alias to either `u32` or `u64` depending on the config option @@ -473,6 +475,49 @@ fn try_write(&self, location: L, value: T) -> Result Ok(()) } + /// Generic fallible write of a fully-located register value. + /// + /// # Examples + /// + /// Tuples carrying a location and a value can be used with this method: + /// + /// ```no_run + /// use kernel::io::{ + /// register, + /// Io, + /// Mmio, + /// }; + /// + /// register! { + /// VERSION(u32) @ 0x100 { + /// 15:8 major; + /// 7:0 minor; + /// } + /// } + /// + /// impl VERSION { + /// fn new(major: u8, minor: u8) -> Self { + /// VERSION::zeroed().with_major(major).with_minor(minor) + /// } + /// } + /// + /// fn do_write_reg(io: &Mmio) -> Result { + /// + /// io.try_write_reg(VERSION::new(1, 0)) + /// } + /// ``` + #[inline(always)] + fn try_write_reg(&self, value: V) -> Result + where + L: IoLoc, + V: LocatedRegister, + Self: IoCapable, + { + let (location, value) = value.into_io_op(); + + self.try_write(location, value) + } + /// Generic fallible update with runtime bounds check. /// /// Note: this does not perform any synchronization. The caller is responsible for ensuring @@ -578,6 +623,48 @@ fn write(&self, location: L, value: T) unsafe { self.io_write(io_value, address) } } + /// Generic infallible write of a fully-located register value. + /// + /// # Examples + /// + /// Tuples carrying a location and a value can be used with this method: + /// + /// ```no_run + /// use kernel::io::{ + /// register, + /// Io, + /// Mmio, + /// }; + /// + /// register! { + /// VERSION(u32) @ 0x100 { + /// 15:8 major; + /// 7:0 minor; + /// } + /// } + /// + /// impl VERSION { + /// fn new(major: u8, minor: u8) -> Self { + /// VERSION::zeroed().with_major(major).with_minor(minor) + /// } + /// } + /// + /// fn do_write_reg(io: &Mmio<0x1000>) { + /// io.write_reg(VERSION::new(1, 0)); + /// } + /// ``` + #[inline(always)] + fn write_reg(&self, value: V) + where + L: IoLoc, + V: LocatedRegister, + Self: IoKnownSize + IoCapable, + { + let (location, value) = value.into_io_op(); + + self.write(location, value) + } + /// Generic infallible update with compile-time bounds check. /// /// Note: this does not perform any synchronization. The caller is responsible for ensuring diff --git a/rust/kernel/io/register.rs b/rust/kernel/io/register.rs index dbd458aaa761..abc49926abfe 100644 --- a/rust/kernel/io/register.rs +++ b/rust/kernel/io/register.rs @@ -80,10 +80,10 @@ //! .with_const_minor_revision::<10>() //! // Runtime value. //! .with_vendor_id(obtain_vendor_id()); -//! io.write((), new_boot0); +//! io.write_reg(new_boot0); //! //! // Or, build a new value from zero and write it: -//! io.write((), BOOT_0::zeroed() +//! io.write_reg(BOOT_0::zeroed() //! .with_const_major_revision::<3>() //! .with_const_minor_revision::<10>() //! .with_vendor_id(obtain_vendor_id()) @@ -382,6 +382,34 @@ fn offset(self) -> usize { } } +/// Trait implemented by items that contain both a register value and the absolute I/O location at +/// which to write it. +/// +/// Implementors can be used with [`Io::write_reg`](super::Io::write_reg). +pub trait LocatedRegister { + /// Register value to write. + type Value: Register; + /// Full location information at which to write the value. + type Location: IoLoc; + + /// Consumes `self` and returns a `(location, value)` tuple describing a valid I/O write + /// operation. + fn into_io_op(self) -> (Self::Location, Self::Value); +} + +impl LocatedRegister for T +where + T: FixedRegister, +{ + type Location = FixedRegisterLoc; + type Value = T; + + #[inline(always)] + fn into_io_op(self) -> (FixedRegisterLoc, T) { + (FixedRegisterLoc::new(), self) + } +} + /// Defines a dedicated type for a register, including getter and setter methods for its fields and /// methods to read and write it from an [`Io`](kernel::io::Io) region. /// @@ -436,6 +464,9 @@ fn offset(self) -> usize { /// // The location of fixed offset registers is already contained in their type. Thus, the /// // `location` argument of `Io::write` is technically redundant and can be replaced by `()`. /// io.write((), val2); +/// +/// // Or, the single-argument `Io::write_reg` can be used. +/// io.write_reg(val2); /// # } /// /// ``` From 79cf41692aadc3d0ac9b1d8e2c2f620ce2103918 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Sat, 14 Mar 2026 10:06:19 +0900 Subject: [PATCH 36/67] sample: rust: pci: use `register!` macro Convert the direct IO accesses to properly defined registers. Tested-by: Zhi Wang Signed-off-by: Alexandre Courbot Link: https://patch.msgid.link/20260314-register-v9-9-86805b2f7e9d@nvidia.com [ Fix up kernel import style. - Danilo ] Signed-off-by: Danilo Krummrich --- samples/rust/rust_driver_pci.rs | 90 +++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/samples/rust/rust_driver_pci.rs b/samples/rust/rust_driver_pci.rs index d3d4a7931deb..47d3e84fab63 100644 --- a/samples/rust/rust_driver_pci.rs +++ b/samples/rust/rust_driver_pci.rs @@ -5,30 +5,63 @@ //! To make this driver probe, QEMU must be run with `-device pci-testdev`. use kernel::{ - device::Bound, - device::Core, + device::{ + Bound, + Core, // + }, devres::Devres, - io::Io, + io::{ + register, + register::Array, + Io, // + }, + num::Bounded, pci, prelude::*, sync::aref::ARef, // }; -struct Regs; +mod regs { + use super::*; -impl Regs { - const TEST: usize = 0x0; - const OFFSET: usize = 0x4; - const DATA: usize = 0x8; - const COUNT: usize = 0xC; - const END: usize = 0x10; + register! { + pub(super) TEST(u8) @ 0x0 { + 7:0 index => TestIndex; + } + + pub(super) OFFSET(u32) @ 0x4 { + 31:0 offset; + } + + pub(super) DATA(u8) @ 0x8 { + 7:0 data; + } + + pub(super) COUNT(u32) @ 0xC { + 31:0 count; + } + } + + pub(super) const END: usize = 0x10; } -type Bar0 = pci::Bar<{ Regs::END }>; +type Bar0 = pci::Bar<{ regs::END }>; #[derive(Copy, Clone, Debug)] struct TestIndex(u8); +impl From> for TestIndex { + fn from(value: Bounded) -> Self { + Self(value.into()) + } +} + +impl From for Bounded { + fn from(value: TestIndex) -> Self { + value.0.into() + } +} + impl TestIndex { const NO_EVENTFD: Self = Self(0); } @@ -54,40 +87,53 @@ struct SampleDriver { impl SampleDriver { fn testdev(index: &TestIndex, bar: &Bar0) -> Result { // Select the test. - bar.write8(index.0, Regs::TEST); + bar.write_reg(regs::TEST::zeroed().with_index(*index)); - let offset = bar.read32(Regs::OFFSET) as usize; - let data = bar.read8(Regs::DATA); + let offset = bar.read(regs::OFFSET).into_raw() as usize; + let data = bar.read(regs::DATA).into(); // Write `data` to `offset` to increase `count` by one. // // Note that we need `try_write8`, since `offset` can't be checked at compile-time. bar.try_write8(data, offset)?; - Ok(bar.read32(Regs::COUNT)) + Ok(bar.read(regs::COUNT).into()) } fn config_space(pdev: &pci::Device) { let config = pdev.config_space(); - // TODO: use the register!() macro for defining PCI configuration space registers once it - // has been move out of nova-core. + // Some PCI configuration space registers. + register! { + VENDOR_ID(u16) @ 0x0 { + 15:0 vendor_id; + } + + REVISION_ID(u8) @ 0x8 { + 7:0 revision_id; + } + + BAR(u32)[6] @ 0x10 { + 31:0 value; + } + } + dev_info!( pdev, "pci-testdev config space read8 rev ID: {:x}\n", - config.read8(0x8) + config.read(REVISION_ID).revision_id() ); dev_info!( pdev, "pci-testdev config space read16 vendor ID: {:x}\n", - config.read16(0) + config.read(VENDOR_ID).vendor_id() ); dev_info!( pdev, "pci-testdev config space read32 BAR 0: {:x}\n", - config.read32(0x10) + config.read(BAR::at(0)).value() ); } } @@ -111,7 +157,7 @@ fn probe(pdev: &pci::Device, info: &Self::IdInfo) -> impl PinInit(0, c"rust_driver_pci"), + bar <- pdev.iomap_region_sized::<{ regs::END }>(0, c"rust_driver_pci"), index: *info, _: { let bar = bar.access(pdev.as_ref())?; @@ -131,7 +177,7 @@ fn probe(pdev: &pci::Device, info: &Self::IdInfo) -> impl PinInit, this: Pin<&Self>) { if let Ok(bar) = this.bar.access(pdev.as_ref()) { // Reset pci-testdev by writing a new test index. - bar.write8(this.index.0, Regs::TEST); + bar.write_reg(regs::TEST::zeroed().with_index(this.index)); } } } From f813ec9e84b4d0ca81ec1da94ab07bfb4a29266c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 3 Feb 2026 00:48:14 +0100 Subject: [PATCH 37/67] devres: fix missing node debug info in devm_krealloc() Fix missing call to set_node_dbginfo() for new devres nodes created by devm_krealloc(). Fixes: f82485722e5d ("devres: provide devm_krealloc()") Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260202235210.55176-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/devres.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 171750c1f691..ce519b98a189 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -940,6 +940,8 @@ void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp) if (!new_dr) return NULL; + set_node_dbginfo(&new_dr->node, "devm_krealloc_release", new_size); + /* * The spinlock protects the linked list against concurrent * modifications but not the resource itself. From 4796dfd7e0e845de5e76b1748d3c022d350f1b57 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 3 Feb 2026 00:48:15 +0100 Subject: [PATCH 38/67] devres: add devres_node_add() Both devres_add() and devres_open_group() acquire the devres_lock and call add_dr(). Add a helper, devres_node_add(), for this pattern. Use guard(spinlock_irqsave) to avoid the explicit unlock call and local flag variables. Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260202235210.55176-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/devres.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index ce519b98a189..fd3e9dba8ff2 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -231,6 +231,13 @@ void devres_free(void *res) } EXPORT_SYMBOL_GPL(devres_free); +static void devres_node_add(struct device *dev, struct devres_node *node) +{ + guard(spinlock_irqsave)(&dev->devres_lock); + + add_dr(dev, node); +} + /** * devres_add - Register device resource * @dev: Device to add resource to @@ -243,11 +250,8 @@ EXPORT_SYMBOL_GPL(devres_free); void devres_add(struct device *dev, void *res) { struct devres *dr = container_of(res, struct devres, data); - unsigned long flags; - spin_lock_irqsave(&dev->devres_lock, flags); - add_dr(dev, &dr->node); - spin_unlock_irqrestore(&dev->devres_lock, flags); + devres_node_add(dev, &dr->node); } EXPORT_SYMBOL_GPL(devres_add); @@ -552,7 +556,6 @@ int devres_release_all(struct device *dev) void *devres_open_group(struct device *dev, void *id, gfp_t gfp) { struct devres_group *grp; - unsigned long flags; grp = kmalloc_obj(*grp, gfp); if (unlikely(!grp)) @@ -569,9 +572,7 @@ void *devres_open_group(struct device *dev, void *id, gfp_t gfp) grp->id = id; grp->color = 0; - spin_lock_irqsave(&dev->devres_lock, flags); - add_dr(dev, &grp->node[0]); - spin_unlock_irqrestore(&dev->devres_lock, flags); + devres_node_add(dev, &grp->node[0]); return grp->id; } EXPORT_SYMBOL_GPL(devres_open_group); From 6fe9d3b942d2d18eee35ac9b0b3443d4caabefb6 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 3 Feb 2026 00:48:16 +0100 Subject: [PATCH 39/67] devres: add devres_node_init() Both alloc_dr() and devres_open_group() initialize devres_node.entry and set devres_node.release. Add a helper, devres_node_init(), for this pattern. Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260202235210.55176-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/devres.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index fd3e9dba8ff2..bf07a6b16727 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -42,6 +42,12 @@ struct devres_group { /* -- 8 pointers */ }; +static void devres_node_init(struct devres_node *node, dr_release_t release) +{ + INIT_LIST_HEAD(&node->entry); + node->release = release; +} + static void set_node_dbginfo(struct devres_node *node, const char *name, size_t size) { @@ -124,8 +130,7 @@ static __always_inline struct devres *alloc_dr(dr_release_t release, if (!(gfp & __GFP_ZERO)) memset(dr, 0, offsetof(struct devres, data)); - INIT_LIST_HEAD(&dr->node.entry); - dr->node.release = release; + devres_node_init(&dr->node, release); return dr; } @@ -561,10 +566,8 @@ void *devres_open_group(struct device *dev, void *id, gfp_t gfp) if (unlikely(!grp)) return NULL; - grp->node[0].release = &group_open_release; - grp->node[1].release = &group_close_release; - INIT_LIST_HEAD(&grp->node[0].entry); - INIT_LIST_HEAD(&grp->node[1].entry); + devres_node_init(&grp->node[0], &group_open_release); + devres_node_init(&grp->node[1], &group_close_release); set_node_dbginfo(&grp->node[0], "grp<", 0); set_node_dbginfo(&grp->node[1], "grp>", 0); grp->id = grp; From 2b5c6a14b5b4326916ef20b39eea3564ad786e9f Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 3 Feb 2026 00:48:17 +0100 Subject: [PATCH 40/67] devres: don't require ARCH_DMA_MINALIGN for devres actions Currently, devres actions are allocated with devres_alloc(), which allocates a struct devres with a flexible array member for the actual data of the resource. The flexible array member is aligned to ARCH_DMA_MINALIGN, which is wasteful for devres actions that only need to store a struct action_devres. Introduce struct devres_action to handle devres actions separately from struct devres, analogous to what we do for struct devres_group. Speaking of which, without this patch struct devres_group is treated as struct devres in release_nodes(). While this is not an actual bug, as release callbacks for devres nodes in struct devres_group are empty functions anyways, it is a bit messy and can be confusing. (Note that besides devres actions, the Rust devres code will also make use of this. The Rust compiler can figure out the correct alignment of T in Devres itself, i.e. no need to force a minimum alignment.) Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260202235210.55176-5-dakr@kernel.org [ Add missing node->release check in devres_for_each_res() and find_dr(); use kzalloc_obj(). - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/base/devres.c | 150 +++++++++++++++++++++++++++++------------- 1 file changed, 105 insertions(+), 45 deletions(-) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index bf07a6b16727..a12e03b9ab67 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -16,15 +16,19 @@ #include "base.h" #include "trace.h" +struct devres_node; +typedef void (*dr_node_release_t)(struct device *dev, struct devres_node *node); + struct devres_node { struct list_head entry; - dr_release_t release; + dr_node_release_t release; const char *name; size_t size; }; struct devres { struct devres_node node; + dr_release_t release; /* * Some archs want to perform DMA into kmalloc caches * and need a guaranteed alignment larger than @@ -42,7 +46,7 @@ struct devres_group { /* -- 8 pointers */ }; -static void devres_node_init(struct devres_node *node, dr_release_t release) +static void devres_node_init(struct devres_node *node, dr_node_release_t release) { INIT_LIST_HEAD(&node->entry); node->release = release; @@ -81,12 +85,12 @@ static void devres_log(struct device *dev, struct devres_node *node, * Release functions for devres group. These callbacks are used only * for identification. */ -static void group_open_release(struct device *dev, void *res) +static void group_open_release(struct device *dev, struct devres_node *node) { /* noop */ } -static void group_close_release(struct device *dev, void *res) +static void group_close_release(struct device *dev, struct devres_node *node) { /* noop */ } @@ -113,6 +117,13 @@ static bool check_dr_size(size_t size, size_t *tot_size) return true; } +static void dr_node_release(struct device *dev, struct devres_node *node) +{ + struct devres *dr = container_of(node, struct devres, node); + + dr->release(dev, dr->data); +} + static __always_inline struct devres *alloc_dr(dr_release_t release, size_t size, gfp_t gfp, int nid) { @@ -130,7 +141,8 @@ static __always_inline struct devres *alloc_dr(dr_release_t release, if (!(gfp & __GFP_ZERO)) memset(dr, 0, offsetof(struct devres, data)); - devres_node_init(&dr->node, release); + devres_node_init(&dr->node, dr_node_release); + dr->release = release; return dr; } @@ -209,7 +221,9 @@ void devres_for_each_res(struct device *dev, dr_release_t release, &dev->devres_head, entry) { struct devres *dr = container_of(node, struct devres, node); - if (node->release != release) + if (node->release != dr_node_release) + continue; + if (dr->release != release) continue; if (match && !match(dev, dr->data, match_data)) continue; @@ -268,7 +282,9 @@ static struct devres *find_dr(struct device *dev, dr_release_t release, list_for_each_entry_reverse(node, &dev->devres_head, entry) { struct devres *dr = container_of(node, struct devres, node); - if (node->release != release) + if (node->release != dr_node_release) + continue; + if (dr->release != release) continue; if (match && !match(dev, dr->data, match_data)) continue; @@ -330,7 +346,7 @@ void *devres_get(struct device *dev, void *new_res, unsigned long flags; spin_lock_irqsave(&dev->devres_lock, flags); - dr = find_dr(dev, new_dr->node.release, match, match_data); + dr = find_dr(dev, new_dr->release, match, match_data); if (!dr) { add_dr(dev, &new_dr->node); dr = new_dr; @@ -504,15 +520,15 @@ static int remove_nodes(struct device *dev, static void release_nodes(struct device *dev, struct list_head *todo) { - struct devres *dr, *tmp; + struct devres_node *node, *tmp; - /* Release. Note that both devres and devres_group are - * handled as devres in the following loop. This is safe. + /* Release. Note that devres, devres_action and devres_group are + * handled as devres_node in the following loop. This is safe. */ - list_for_each_entry_safe_reverse(dr, tmp, todo, node.entry) { - devres_log(dev, &dr->node, "REL"); - dr->node.release(dev, dr->data); - kfree(dr); + list_for_each_entry_safe_reverse(node, tmp, todo, entry) { + devres_log(dev, node, "REL"); + node->release(dev, node); + kfree(node); } } @@ -720,20 +736,22 @@ struct action_devres { void (*action)(void *); }; -static int devm_action_match(struct device *dev, void *res, void *p) -{ - struct action_devres *devres = res; - struct action_devres *target = p; +struct devres_action { + struct devres_node node; + struct action_devres action; +}; - return devres->action == target->action && - devres->data == target->data; +static int devm_action_match(struct devres_action *devres, struct action_devres *target) +{ + return devres->action.action == target->action && + devres->action.data == target->data; } -static void devm_action_release(struct device *dev, void *res) +static void devm_action_release(struct device *dev, struct devres_node *node) { - struct action_devres *devres = res; + struct devres_action *devres = container_of(node, struct devres_action, node); - devres->action(devres->data); + devres->action.action(devres->action.data); } /** @@ -748,32 +766,71 @@ static void devm_action_release(struct device *dev, void *res) */ int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name) { - struct action_devres *devres; + struct devres_action *devres; - devres = __devres_alloc_node(devm_action_release, sizeof(struct action_devres), - GFP_KERNEL, NUMA_NO_NODE, name); + devres = kzalloc_obj(*devres); if (!devres) return -ENOMEM; - devres->data = data; - devres->action = action; + devres_node_init(&devres->node, devm_action_release); + set_node_dbginfo(&devres->node, name, sizeof(*devres)); - devres_add(dev, devres); + devres->action.data = data; + devres->action.action = action; + + devres_node_add(dev, &devres->node); return 0; } EXPORT_SYMBOL_GPL(__devm_add_action); -bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data) +static struct devres_action *devres_action_find(struct device *dev, + void (*action)(void *), + void *data) { - struct action_devres devres = { + struct devres_node *node; + struct action_devres target = { .data = data, .action = action, }; - return devres_find(dev, devm_action_release, devm_action_match, &devres); + list_for_each_entry_reverse(node, &dev->devres_head, entry) { + struct devres_action *dr = container_of(node, struct devres_action, node); + + if (node->release != devm_action_release) + continue; + if (devm_action_match(dr, &target)) + return dr; + } + + return NULL; +} + +bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data) +{ + guard(spinlock_irqsave)(&dev->devres_lock); + + return !!devres_action_find(dev, action, data); } EXPORT_SYMBOL_GPL(devm_is_action_added); +static struct devres_action *remove_action(struct device *dev, + void (*action)(void *), + void *data) +{ + struct devres_action *dr; + + guard(spinlock_irqsave)(&dev->devres_lock); + + dr = devres_action_find(dev, action, data); + if (!dr) + return ERR_PTR(-ENOENT); + + list_del_init(&dr->node.entry); + devres_log(dev, &dr->node, "REM"); + + return dr; +} + /** * devm_remove_action_nowarn() - removes previously added custom action * @dev: Device that owns the action @@ -798,13 +855,15 @@ int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data) { - struct action_devres devres = { - .data = data, - .action = action, - }; + struct devres_action *dr; - return devres_destroy(dev, devm_action_release, devm_action_match, - &devres); + dr = remove_action(dev, action, data); + if (IS_ERR(dr)) + return PTR_ERR(dr); + + kfree(dr); + + return 0; } EXPORT_SYMBOL_GPL(devm_remove_action_nowarn); @@ -820,14 +879,15 @@ EXPORT_SYMBOL_GPL(devm_remove_action_nowarn); */ void devm_release_action(struct device *dev, void (*action)(void *), void *data) { - struct action_devres devres = { - .data = data, - .action = action, - }; + struct devres_action *dr; - WARN_ON(devres_release(dev, devm_action_release, devm_action_match, - &devres)); + dr = remove_action(dev, action, data); + if (WARN_ON(IS_ERR(dr))) + return; + dr->action.action(dr->action.data); + + kfree(dr); } EXPORT_SYMBOL_GPL(devm_release_action); From 55e329d0f7a0a92cf998cc6f20df6e46a4d6ab12 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 3 Feb 2026 00:48:18 +0100 Subject: [PATCH 41/67] devres: add free_node callback to struct devres_node Currently, there are three "subclasses" of struct devres_node, which are struct devres, struct devres_group, struct devres_action. release_nodes(), which only knows about the base struct devres_node, assumes that for all "subclasses" struct devres_node is the first member in the structure and calls kfree() on struct devres_node. While this technically works, we can still improve semantical correctness and type safety with a corresponding free_node() callback. Additionally, we will need this callback soon in the Rust Devres code, to allocate and free the required memory on the Rust side. Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260202235210.55176-6-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/devres.c | 57 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index a12e03b9ab67..e6756476db74 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -18,10 +18,12 @@ struct devres_node; typedef void (*dr_node_release_t)(struct device *dev, struct devres_node *node); +typedef void (*dr_node_free_t)(struct devres_node *node); struct devres_node { struct list_head entry; dr_node_release_t release; + dr_node_free_t free_node; const char *name; size_t size; }; @@ -46,10 +48,18 @@ struct devres_group { /* -- 8 pointers */ }; -static void devres_node_init(struct devres_node *node, dr_node_release_t release) +static void devres_node_init(struct devres_node *node, + dr_node_release_t release, + dr_node_free_t free_node) { INIT_LIST_HEAD(&node->entry); node->release = release; + node->free_node = free_node; +} + +static inline void free_node(struct devres_node *node) +{ + node->free_node(node); } static void set_node_dbginfo(struct devres_node *node, const char *name, @@ -124,6 +134,13 @@ static void dr_node_release(struct device *dev, struct devres_node *node) dr->release(dev, dr->data); } +static void dr_node_free(struct devres_node *node) +{ + struct devres *dr = container_of(node, struct devres, node); + + kfree(dr); +} + static __always_inline struct devres *alloc_dr(dr_release_t release, size_t size, gfp_t gfp, int nid) { @@ -141,7 +158,7 @@ static __always_inline struct devres *alloc_dr(dr_release_t release, if (!(gfp & __GFP_ZERO)) memset(dr, 0, offsetof(struct devres, data)); - devres_node_init(&dr->node, dr_node_release); + devres_node_init(&dr->node, dr_node_release, dr_node_free); dr->release = release; return dr; } @@ -233,6 +250,11 @@ void devres_for_each_res(struct device *dev, dr_release_t release, } EXPORT_SYMBOL_GPL(devres_for_each_res); +static inline void free_dr(struct devres *dr) +{ + free_node(&dr->node); +} + /** * devres_free - Free device resource data * @res: Pointer to devres data to free @@ -245,7 +267,7 @@ void devres_free(void *res) struct devres *dr = container_of(res, struct devres, data); BUG_ON(!list_empty(&dr->node.entry)); - kfree(dr); + free_dr(dr); } } EXPORT_SYMBOL_GPL(devres_free); @@ -522,13 +544,10 @@ static void release_nodes(struct device *dev, struct list_head *todo) { struct devres_node *node, *tmp; - /* Release. Note that devres, devres_action and devres_group are - * handled as devres_node in the following loop. This is safe. - */ list_for_each_entry_safe_reverse(node, tmp, todo, entry) { devres_log(dev, node, "REL"); node->release(dev, node); - kfree(node); + free_node(node); } } @@ -561,6 +580,13 @@ int devres_release_all(struct device *dev) return cnt; } +static void devres_group_free(struct devres_node *node) +{ + struct devres_group *grp = container_of(node, struct devres_group, node[0]); + + kfree(grp); +} + /** * devres_open_group - Open a new devres group * @dev: Device to open devres group for @@ -582,8 +608,8 @@ void *devres_open_group(struct device *dev, void *id, gfp_t gfp) if (unlikely(!grp)) return NULL; - devres_node_init(&grp->node[0], &group_open_release); - devres_node_init(&grp->node[1], &group_close_release); + devres_node_init(&grp->node[0], &group_open_release, devres_group_free); + devres_node_init(&grp->node[1], &group_close_release, NULL); set_node_dbginfo(&grp->node[0], "grp<", 0); set_node_dbginfo(&grp->node[1], "grp>", 0); grp->id = grp; @@ -754,6 +780,13 @@ static void devm_action_release(struct device *dev, struct devres_node *node) devres->action.action(devres->action.data); } +static void devm_action_free(struct devres_node *node) +{ + struct devres_action *action = container_of(node, struct devres_action, node); + + kfree(action); +} + /** * __devm_add_action() - add a custom action to list of managed resources * @dev: Device that owns the action @@ -772,7 +805,7 @@ int __devm_add_action(struct device *dev, void (*action)(void *), void *data, co if (!devres) return -ENOMEM; - devres_node_init(&devres->node, devm_action_release); + devres_node_init(&devres->node, devm_action_release, devm_action_free); set_node_dbginfo(&devres->node, name, sizeof(*devres)); devres->action.data = data; @@ -1015,7 +1048,7 @@ void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp) old_dr = find_dr(dev, devm_kmalloc_release, devm_kmalloc_match, ptr); if (!old_dr) { spin_unlock_irqrestore(&dev->devres_lock, flags); - kfree(new_dr); + free_dr(new_dr); WARN(1, "Memory chunk not managed or managed by a different device."); return NULL; } @@ -1035,7 +1068,7 @@ void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp) * list. This is also the reason why we must not use devm_kfree() - the * links are no longer valid. */ - kfree(old_dr); + free_dr(old_dr); return new_dr->data; } From e19f3fed3c4cd523dffa87f3c2d6837aff538543 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 3 Feb 2026 00:48:19 +0100 Subject: [PATCH 42/67] devres: use guard(spinlock_irqsave) where applicable Use guard(spinlock_irqsave)(&dev->devres_lock) where it improves the code. Some places still use manual spin_lock_irqsave() and spin_unlock() as changing it to use a scoped_guard() would result in unnecessary churn. Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260202235210.55176-7-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/devres.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index e6756476db74..dadc185251fb 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -228,12 +228,11 @@ void devres_for_each_res(struct device *dev, dr_release_t release, { struct devres_node *node; struct devres_node *tmp; - unsigned long flags; if (!fn) return; - spin_lock_irqsave(&dev->devres_lock, flags); + guard(spinlock_irqsave)(&dev->devres_lock); list_for_each_entry_safe_reverse(node, tmp, &dev->devres_head, entry) { struct devres *dr = container_of(node, struct devres, node); @@ -246,7 +245,6 @@ void devres_for_each_res(struct device *dev, dr_release_t release, continue; fn(dev, dr->data, data); } - spin_unlock_irqrestore(&dev->devres_lock, flags); } EXPORT_SYMBOL_GPL(devres_for_each_res); @@ -334,14 +332,12 @@ void *devres_find(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { struct devres *dr; - unsigned long flags; - spin_lock_irqsave(&dev->devres_lock, flags); + guard(spinlock_irqsave)(&dev->devres_lock); dr = find_dr(dev, release, match, match_data); - spin_unlock_irqrestore(&dev->devres_lock, flags); - if (dr) return dr->data; + return NULL; } EXPORT_SYMBOL_GPL(devres_find); @@ -400,18 +396,15 @@ void *devres_remove(struct device *dev, dr_release_t release, dr_match_t match, void *match_data) { struct devres *dr; - unsigned long flags; - spin_lock_irqsave(&dev->devres_lock, flags); + guard(spinlock_irqsave)(&dev->devres_lock); dr = find_dr(dev, release, match, match_data); if (dr) { list_del_init(&dr->node.entry); devres_log(dev, &dr->node, "REM"); - } - spin_unlock_irqrestore(&dev->devres_lock, flags); - - if (dr) return dr->data; + } + return NULL; } EXPORT_SYMBOL_GPL(devres_remove); @@ -659,17 +652,13 @@ static struct devres_group *find_group(struct device *dev, void *id) void devres_close_group(struct device *dev, void *id) { struct devres_group *grp; - unsigned long flags; - - spin_lock_irqsave(&dev->devres_lock, flags); + guard(spinlock_irqsave)(&dev->devres_lock); grp = find_group(dev, id); if (grp) add_dr(dev, &grp->node[1]); else WARN_ON(1); - - spin_unlock_irqrestore(&dev->devres_lock, flags); } EXPORT_SYMBOL_GPL(devres_close_group); From c321a511e37c3aaa16226b3529c30b78f9e380c1 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 3 Feb 2026 00:48:20 +0100 Subject: [PATCH 43/67] devres: remove unnecessary unlocks in devres_release_group() There is no need to call spin_unlock_irqrestore() in every conditional block, as release_nodes() can safely be called with an empty list, in case we hit the "if else" or "else" case. We do not use a scoped_guard() here to not unnecessarily change the indentation level. Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260202235210.55176-8-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/devres.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index dadc185251fb..8fa54a3ed120 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -712,7 +712,6 @@ int devres_release_group(struct device *dev, void *id) int cnt = 0; spin_lock_irqsave(&dev->devres_lock, flags); - grp = find_group(dev, id); if (grp) { struct list_head *first = &grp->node[0].entry; @@ -722,20 +721,18 @@ int devres_release_group(struct device *dev, void *id) end = grp->node[1].entry.next; cnt = remove_nodes(dev, first, end, &todo); - spin_unlock_irqrestore(&dev->devres_lock, flags); - - release_nodes(dev, &todo); } else if (list_empty(&dev->devres_head)) { /* * dev is probably dying via devres_release_all(): groups * have already been removed and are on the process of * being released - don't touch and don't warn. */ - spin_unlock_irqrestore(&dev->devres_lock, flags); } else { WARN_ON(1); - spin_unlock_irqrestore(&dev->devres_lock, flags); } + spin_unlock_irqrestore(&dev->devres_lock, flags); + + release_nodes(dev, &todo); return cnt; } From 31b5733bcdef139719c990a86cd98bac07a5597c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 13 Feb 2026 23:07:11 +0100 Subject: [PATCH 44/67] devres: move struct devres_node into base.h Move struct devres_node into base.h, such that we can access it from the Rust devres code. Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260213220718.82835-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/base.h | 12 ++++++++++++ drivers/base/devres.c | 12 ------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 3bc8e6fd06a8..8423148155a3 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -216,6 +216,18 @@ static inline void device_set_driver(struct device *dev, const struct device_dri WRITE_ONCE(dev->driver, (struct device_driver *)drv); } +struct devres_node; +typedef void (*dr_node_release_t)(struct device *dev, struct devres_node *node); +typedef void (*dr_node_free_t)(struct devres_node *node); + +struct devres_node { + struct list_head entry; + dr_node_release_t release; + dr_node_free_t free_node; + const char *name; + size_t size; +}; + void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 8fa54a3ed120..5202af4d5029 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -16,18 +16,6 @@ #include "base.h" #include "trace.h" -struct devres_node; -typedef void (*dr_node_release_t)(struct device *dev, struct devres_node *node); -typedef void (*dr_node_free_t)(struct devres_node *node); - -struct devres_node { - struct list_head entry; - dr_node_release_t release; - dr_node_free_t free_node; - const char *name; - size_t size; -}; - struct devres { struct devres_node node; dr_release_t release; From 9738ca7df98f37b647c6a2f5ac5dfe49db03c948 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 13 Feb 2026 23:07:12 +0100 Subject: [PATCH 45/67] devres: export devres_node_init() and devres_node_add() Export devres_node_init() and devres_node_add() through base.h, such that we can access is from the Rust devres code. Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260213220718.82835-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/base.h | 3 +++ drivers/base/devres.c | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index 8423148155a3..23828bd920db 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -228,6 +228,9 @@ struct devres_node { size_t size; }; +void devres_node_init(struct devres_node *node, dr_node_release_t release, + dr_node_free_t free_node); +void devres_node_add(struct device *dev, struct devres_node *node); void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 5202af4d5029..22d10b30774f 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -36,9 +36,9 @@ struct devres_group { /* -- 8 pointers */ }; -static void devres_node_init(struct devres_node *node, - dr_node_release_t release, - dr_node_free_t free_node) +void devres_node_init(struct devres_node *node, + dr_node_release_t release, + dr_node_free_t free_node) { INIT_LIST_HEAD(&node->entry); node->release = release; @@ -258,7 +258,7 @@ void devres_free(void *res) } EXPORT_SYMBOL_GPL(devres_free); -static void devres_node_add(struct device *dev, struct devres_node *node) +void devres_node_add(struct device *dev, struct devres_node *node) { guard(spinlock_irqsave)(&dev->devres_lock); From b1081ef74d804ae1c512151c2610b79513a52cd6 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 13 Feb 2026 23:07:13 +0100 Subject: [PATCH 46/67] devres: add devres_node_remove() When the Rust Devres container type is dropped we need a way to remove the embedded struct devres_node from the device's node list. Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260213220718.82835-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/base.h | 1 + drivers/base/devres.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/drivers/base/base.h b/drivers/base/base.h index 23828bd920db..c9812f0a56b0 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -231,6 +231,7 @@ struct devres_node { void devres_node_init(struct devres_node *node, dr_node_release_t release, dr_node_free_t free_node); void devres_node_add(struct device *dev, struct devres_node *node); +bool devres_node_remove(struct device *dev, struct devres_node *node); void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 22d10b30774f..6c7854d4e4a8 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -365,6 +365,22 @@ void *devres_get(struct device *dev, void *new_res, } EXPORT_SYMBOL_GPL(devres_get); +bool devres_node_remove(struct device *dev, struct devres_node *node) +{ + struct devres_node *__node; + + guard(spinlock_irqsave)(&dev->devres_lock); + list_for_each_entry_reverse(__node, &dev->devres_head, entry) { + if (__node == node) { + list_del_init(&node->entry); + devres_log(dev, node, "REM"); + return true; + } + } + + return false; +} + /** * devres_remove - Find a device resource and remove it * @dev: Device to find resource from From ba424bc2c7bb3a9b81d1b6c773f1e2e7b8fffe66 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 13 Feb 2026 23:07:14 +0100 Subject: [PATCH 47/67] devres: rename and export set_node_dbginfo() Rename set_node_dbginfo() to devres_set_node_dbginfo() and export it through base.h, such that we can access is from the Rust devres code. Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260213220718.82835-5-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/base/base.h | 2 ++ drivers/base/devres.c | 14 +++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index c9812f0a56b0..afeda89fd471 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -232,6 +232,8 @@ void devres_node_init(struct devres_node *node, dr_node_release_t release, dr_node_free_t free_node); void devres_node_add(struct device *dev, struct devres_node *node); bool devres_node_remove(struct device *dev, struct devres_node *node); +void devres_set_node_dbginfo(struct devres_node *node, const char *name, + size_t size); void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 6c7854d4e4a8..9d9842fc5a19 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -50,7 +50,7 @@ static inline void free_node(struct devres_node *node) node->free_node(node); } -static void set_node_dbginfo(struct devres_node *node, const char *name, +void devres_set_node_dbginfo(struct devres_node *node, const char *name, size_t size) { node->name = name; @@ -189,7 +189,7 @@ void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, int nid, dr = alloc_dr(release, size, gfp | __GFP_ZERO, nid); if (unlikely(!dr)) return NULL; - set_node_dbginfo(&dr->node, name, size); + devres_set_node_dbginfo(&dr->node, name, size); return dr->data; } EXPORT_SYMBOL_GPL(__devres_alloc_node); @@ -607,8 +607,8 @@ void *devres_open_group(struct device *dev, void *id, gfp_t gfp) devres_node_init(&grp->node[0], &group_open_release, devres_group_free); devres_node_init(&grp->node[1], &group_close_release, NULL); - set_node_dbginfo(&grp->node[0], "grp<", 0); - set_node_dbginfo(&grp->node[1], "grp>", 0); + devres_set_node_dbginfo(&grp->node[0], "grp<", 0); + devres_set_node_dbginfo(&grp->node[1], "grp>", 0); grp->id = grp; if (id) grp->id = id; @@ -796,7 +796,7 @@ int __devm_add_action(struct device *dev, void (*action)(void *), void *data, co return -ENOMEM; devres_node_init(&devres->node, devm_action_release, devm_action_free); - set_node_dbginfo(&devres->node, name, sizeof(*devres)); + devres_set_node_dbginfo(&devres->node, name, sizeof(*devres)); devres->action.data = data; devres->action.action = action; @@ -956,7 +956,7 @@ void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) * This is named devm_kzalloc_release for historical reasons * The initial implementation did not support kmalloc, only kzalloc */ - set_node_dbginfo(&dr->node, "devm_kzalloc_release", size); + devres_set_node_dbginfo(&dr->node, "devm_kzalloc_release", size); devres_add(dev, dr->data); return dr->data; } @@ -1027,7 +1027,7 @@ void *devm_krealloc(struct device *dev, void *ptr, size_t new_size, gfp_t gfp) if (!new_dr) return NULL; - set_node_dbginfo(&new_dr->node, "devm_krealloc_release", new_size); + devres_set_node_dbginfo(&new_dr->node, "devm_krealloc_release", new_size); /* * The spinlock protects the linked list against concurrent From 9aa64d2503c6f5a803ff2990608312e5bdc6b0de Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 13 Feb 2026 23:07:15 +0100 Subject: [PATCH 48/67] rust: devres: embed struct devres_node directly Currently, the Devres container uses devm_add_action() to register a devres callback. devm_add_action() allocates a struct action_devres, which on top of struct devres_node, just keeps a data pointer and release function pointer. This is an unnecessary indirection, given that analogous to struct devres, the Devres container can just embed a struct devres_node directly without an additional allocation. In contrast to struct devres, we don't need to force an alignment of ARCH_DMA_MINALIGN (as struct devres does to account for the worst case) since we have generics in Rust. I.e. the compiler already ensures correct alignment of the embedded T in Devres. Thus, get rid of devm_add_action() and instead embed a struct devres_node directly. Reviewed-by: Alice Ryhl Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20260213220718.82835-6-dakr@kernel.org [ * Improve comment about core::any::type_name(), * add #[must_use] to devres_node_remove(), * use container_of!() in devres_node_free_node(). - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/devres.rs | 185 +++++++++++++++++++++++++++++++----------- 1 file changed, 139 insertions(+), 46 deletions(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 6afe196be42c..9e5f93aed20c 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -23,9 +23,22 @@ rcu, Arc, // }, - types::ForeignOwnable, + types::{ + ForeignOwnable, + Opaque, // + }, }; +/// Inner type that embeds a `struct devres_node` and the `Revocable`. +#[repr(C)] +#[pin_data] +struct Inner { + #[pin] + node: Opaque, + #[pin] + data: Revocable, +} + /// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to /// manage their lifetime. /// @@ -111,12 +124,64 @@ /// ``` pub struct Devres { dev: ARef, - /// Pointer to [`Self::devres_callback`]. - /// - /// Has to be stored, since Rust does not guarantee to always return the same address for a - /// function. However, the C API uses the address as a key. - callback: unsafe extern "C" fn(*mut c_void), - data: Arc>, + inner: Arc>, +} + +// Calling the FFI functions from the `base` module directly from the `Devres` impl may result in +// them being called directly from driver modules. This happens since the Rust compiler will use +// monomorphisation, so it might happen that functions are instantiated within the calling driver +// module. For now, work around this with `#[inline(never)]` helpers. +// +// TODO: Remove once a more generic solution has been implemented. For instance, we may be able to +// leverage `bindgen` to take care of this depending on whether a symbol is (already) exported. +mod base { + use kernel::{ + bindings, + prelude::*, // + }; + + #[inline(never)] + #[allow(clippy::missing_safety_doc)] + pub(super) unsafe fn devres_node_init( + node: *mut bindings::devres_node, + release: bindings::dr_node_release_t, + free: bindings::dr_node_free_t, + ) { + // SAFETY: Safety requirements are the same as `bindings::devres_node_init`. + unsafe { bindings::devres_node_init(node, release, free) } + } + + #[inline(never)] + #[allow(clippy::missing_safety_doc)] + pub(super) unsafe fn devres_set_node_dbginfo( + node: *mut bindings::devres_node, + name: *const c_char, + size: usize, + ) { + // SAFETY: Safety requirements are the same as `bindings::devres_set_node_dbginfo`. + unsafe { bindings::devres_set_node_dbginfo(node, name, size) } + } + + #[inline(never)] + #[allow(clippy::missing_safety_doc)] + pub(super) unsafe fn devres_node_add( + dev: *mut bindings::device, + node: *mut bindings::devres_node, + ) { + // SAFETY: Safety requirements are the same as `bindings::devres_node_add`. + unsafe { bindings::devres_node_add(dev, node) } + } + + #[must_use] + #[inline(never)] + #[allow(clippy::missing_safety_doc)] + pub(super) unsafe fn devres_node_remove( + dev: *mut bindings::device, + node: *mut bindings::devres_node, + ) -> bool { + // SAFETY: Safety requirements are the same as `bindings::devres_node_remove`. + unsafe { bindings::devres_node_remove(dev, node) } + } } impl Devres { @@ -128,58 +193,86 @@ pub fn new(dev: &Device, data: impl PinInit) -> Result where Error: From, { - let callback = Self::devres_callback; - let data = Arc::pin_init(Revocable::new(data), GFP_KERNEL)?; - let devres_data = data.clone(); + let inner = Arc::pin_init::( + try_pin_init!(Inner { + node <- Opaque::ffi_init(|node: *mut bindings::devres_node| { + // SAFETY: `node` is a valid pointer to an uninitialized `struct devres_node`. + unsafe { + base::devres_node_init( + node, + Some(Self::devres_node_release), + Some(Self::devres_node_free_node), + ) + }; + + // SAFETY: `node` is a valid pointer to an uninitialized `struct devres_node`. + unsafe { + base::devres_set_node_dbginfo( + node, + // TODO: Use `core::any::type_name::()` once it is a `const fn`, + // such that we can convert the `&str` to a `&CStr` at compile-time. + c"Devres".as_char_ptr(), + core::mem::size_of::>(), + ) + }; + }), + data <- Revocable::new(data), + }), + GFP_KERNEL, + )?; // SAFETY: - // - `dev.as_raw()` is a pointer to a valid bound device. - // - `data` is guaranteed to be a valid for the duration of the lifetime of `Self`. - // - `devm_add_action()` is guaranteed not to call `callback` for the entire lifetime of - // `dev`. - to_result(unsafe { - bindings::devm_add_action( - dev.as_raw(), - Some(callback), - Arc::as_ptr(&data).cast_mut().cast(), - ) - })?; + // - `dev` is a valid pointer to a bound `struct device`. + // - `node` is a valid pointer to a `struct devres_node`. + // - `devres_node_add()` is guaranteed not to call `devres_node_release()` for the entire + // lifetime of `dev`. + unsafe { base::devres_node_add(dev.as_raw(), inner.node.get()) }; - // `devm_add_action()` was successful and has consumed the reference count. - core::mem::forget(devres_data); + // Take additional reference count for `devres_node_add()`. + core::mem::forget(inner.clone()); Ok(Self { dev: dev.into(), - callback, - data, + inner, }) } fn data(&self) -> &Revocable { - &self.data + &self.inner.data } #[allow(clippy::missing_safety_doc)] - unsafe extern "C" fn devres_callback(ptr: *mut kernel::ffi::c_void) { - // SAFETY: In `Self::new` we've passed a valid pointer of `Revocable` to - // `devm_add_action()`, hence `ptr` must be a valid pointer to `Revocable`. - let data = unsafe { Arc::from_raw(ptr.cast::>()) }; + unsafe extern "C" fn devres_node_release( + _dev: *mut bindings::device, + node: *mut bindings::devres_node, + ) { + let node = Opaque::cast_from(node); - data.revoke(); + // SAFETY: `node` is in the same allocation as its container. + let inner = unsafe { kernel::container_of!(node, Inner, node) }; + + // SAFETY: `inner` is a valid `Inner` pointer. + let inner = unsafe { &*inner }; + + inner.data.revoke(); } - fn remove_action(&self) -> bool { + #[allow(clippy::missing_safety_doc)] + unsafe extern "C" fn devres_node_free_node(node: *mut bindings::devres_node) { + let node = Opaque::cast_from(node); + + // SAFETY: `node` is in the same allocation as its container. + let inner = unsafe { kernel::container_of!(node, Inner, node) }; + + // SAFETY: `inner` points to the entire `Inner` allocation. + drop(unsafe { Arc::from_raw(inner) }); + } + + fn remove_node(&self) -> bool { // SAFETY: - // - `self.dev` is a valid `Device`, - // - the `action` and `data` pointers are the exact same ones as given to - // `devm_add_action()` previously, - (unsafe { - bindings::devm_remove_action_nowarn( - self.dev.as_raw(), - Some(self.callback), - core::ptr::from_ref(self.data()).cast_mut().cast(), - ) - } == 0) + // - `self.device().as_raw()` is a valid pointer to a bound `struct device`. + // - `self.inner.node.get()` is a valid pointer to a `struct devres_node`. + unsafe { base::devres_node_remove(self.device().as_raw(), self.inner.node.get()) } } /// Return a reference of the [`Device`] this [`Devres`] instance has been created with. @@ -261,12 +354,12 @@ fn drop(&mut self) { // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data // anymore, hence it is safe not to wait for the grace period to finish. if unsafe { self.data().revoke_nosync() } { - // We revoked `self.data` before the devres action did, hence try to remove it. - if self.remove_action() { + // We revoked `self.data` before devres did, hence try to remove it. + if self.remove_node() { // SAFETY: In `Self::new` we have taken an additional reference count of `self.data` - // for `devm_add_action()`. Since `remove_action()` was successful, we have to drop + // for `devres_node_add()`. Since `remove_node()` was successful, we have to drop // this additional reference count. - drop(unsafe { Arc::from_raw(Arc::as_ptr(&self.data)) }); + drop(unsafe { Arc::from_raw(Arc::as_ptr(&self.inner)) }); } } } From f72e77c33e4b5657af35125e75bab249256030f3 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 17 Mar 2026 09:01:20 -0700 Subject: [PATCH 49/67] device property: Make modifications of fwnode "flags" thread safe In various places in the kernel, we modify the fwnode "flags" member by doing either: fwnode->flags |= SOME_FLAG; fwnode->flags &= ~SOME_FLAG; This type of modification is not thread-safe. If two threads are both mucking with the flags at the same time then one can clobber the other. While flags are often modified while under the "fwnode_link_lock", this is not universally true. Create some accessor functions for setting, clearing, and testing the FWNODE flags and move all users to these accessor functions. New accessor functions use set_bit() and clear_bit(), which are thread-safe. Cc: stable@vger.kernel.org Fixes: c2c724c868c4 ("driver core: Add fw_devlink_parse_fwtree()") Reviewed-by: Andy Shevchenko Acked-by: Mark Brown Reviewed-by: Wolfram Sang Signed-off-by: Douglas Anderson Reviewed-by: Rafael J. Wysocki (Intel) Reviewed-by: Saravana Kannan Link: https://patch.msgid.link/20260317090112.v2.1.I0a4d03104ecd5103df3d76f66c8d21b1d15a2e38@changeid [ Fix fwnode_clear_flag() argument alignment, restore dropped blank line in fwnode_dev_initialized(), and remove unnecessary parentheses around fwnode_test_flag() calls. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/base/core.c | 24 ++++++++-------- drivers/bus/imx-weim.c | 2 +- drivers/i2c/i2c-core-of.c | 2 +- drivers/net/phy/mdio_bus_provider.c | 4 +-- drivers/of/base.c | 2 +- drivers/of/dynamic.c | 2 +- drivers/of/platform.c | 2 +- drivers/spi/spi.c | 2 +- include/linux/fwnode.h | 44 +++++++++++++++++++++-------- 9 files changed, 53 insertions(+), 31 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 5933852df837..763e17e9f148 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -182,7 +182,7 @@ void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) if (fwnode->dev) return; - fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; + fwnode_set_flag(fwnode, FWNODE_FLAG_NOT_DEVICE); fwnode_links_purge_consumers(fwnode); fwnode_for_each_available_child_node(fwnode, child) @@ -228,7 +228,7 @@ static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode, if (fwnode->dev && fwnode->dev->bus) return; - fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; + fwnode_set_flag(fwnode, FWNODE_FLAG_NOT_DEVICE); __fwnode_links_move_consumers(fwnode, new_sup); fwnode_for_each_available_child_node(fwnode, child) @@ -1012,7 +1012,7 @@ static void device_links_missing_supplier(struct device *dev) static bool dev_is_best_effort(struct device *dev) { return (fw_devlink_best_effort && dev->can_match) || - (dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT)); + (dev->fwnode && fwnode_test_flag(dev->fwnode, FWNODE_FLAG_BEST_EFFORT)); } static struct fwnode_handle *fwnode_links_check_suppliers( @@ -1723,11 +1723,11 @@ bool fw_devlink_is_strict(void) static void fw_devlink_parse_fwnode(struct fwnode_handle *fwnode) { - if (fwnode->flags & FWNODE_FLAG_LINKS_ADDED) + if (fwnode_test_flag(fwnode, FWNODE_FLAG_LINKS_ADDED)) return; fwnode_call_int_op(fwnode, add_links); - fwnode->flags |= FWNODE_FLAG_LINKS_ADDED; + fwnode_set_flag(fwnode, FWNODE_FLAG_LINKS_ADDED); } static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode) @@ -1885,7 +1885,7 @@ static bool fwnode_init_without_drv(struct fwnode_handle *fwnode) struct device *dev; bool ret; - if (!(fwnode->flags & FWNODE_FLAG_INITIALIZED)) + if (!fwnode_test_flag(fwnode, FWNODE_FLAG_INITIALIZED)) return false; dev = get_dev_from_fwnode(fwnode); @@ -2001,10 +2001,10 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, * We aren't trying to find all cycles. Just a cycle between con and * sup_handle. */ - if (sup_handle->flags & FWNODE_FLAG_VISITED) + if (fwnode_test_flag(sup_handle, FWNODE_FLAG_VISITED)) return false; - sup_handle->flags |= FWNODE_FLAG_VISITED; + fwnode_set_flag(sup_handle, FWNODE_FLAG_VISITED); /* Termination condition. */ if (sup_handle == con_handle) { @@ -2074,7 +2074,7 @@ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, } out: - sup_handle->flags &= ~FWNODE_FLAG_VISITED; + fwnode_clear_flag(sup_handle, FWNODE_FLAG_VISITED); put_device(sup_dev); put_device(con_dev); put_device(par_dev); @@ -2127,7 +2127,7 @@ static int fw_devlink_create_devlink(struct device *con, * When such a flag is set, we can't create device links where P is the * supplier of C as that would delay the probe of C. */ - if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD && + if (fwnode_test_flag(sup_handle, FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD) && fwnode_is_ancestor_of(sup_handle, con->fwnode)) return -EINVAL; @@ -2150,7 +2150,7 @@ static int fw_devlink_create_devlink(struct device *con, else flags = FW_DEVLINK_FLAGS_PERMISSIVE; - if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE) + if (fwnode_test_flag(sup_handle, FWNODE_FLAG_NOT_DEVICE)) sup_dev = fwnode_get_next_parent_dev(sup_handle); else sup_dev = get_dev_from_fwnode(sup_handle); @@ -2162,7 +2162,7 @@ static int fw_devlink_create_devlink(struct device *con, * supplier device indefinitely. */ if (sup_dev->links.status == DL_DEV_NO_DRIVER && - sup_handle->flags & FWNODE_FLAG_INITIALIZED) { + fwnode_test_flag(sup_handle, FWNODE_FLAG_INITIALIZED)) { dev_dbg(con, "Not linking %pfwf - dev might never probe\n", sup_handle); diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c index 83d623d97f5f..f735e0462c55 100644 --- a/drivers/bus/imx-weim.c +++ b/drivers/bus/imx-weim.c @@ -332,7 +332,7 @@ static int of_weim_notify(struct notifier_block *nb, unsigned long action, * fw_devlink doesn't skip adding consumers to this * device. */ - rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; + fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE); if (!of_platform_device_create(rd->dn, NULL, &pdev->dev)) { dev_err(&pdev->dev, "Failed to create child device '%pOF'\n", diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index eb7fb202355f..354a88d0599e 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -180,7 +180,7 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, * Clear the flag before adding the device so that fw_devlink * doesn't skip adding consumers to this device. */ - rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; + fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE); client = of_i2c_register_device(adap, rd->dn); if (IS_ERR(client)) { dev_err(&adap->dev, "failed to create client for '%pOF'\n", diff --git a/drivers/net/phy/mdio_bus_provider.c b/drivers/net/phy/mdio_bus_provider.c index 4b0637405740..fd691c5424ea 100644 --- a/drivers/net/phy/mdio_bus_provider.c +++ b/drivers/net/phy/mdio_bus_provider.c @@ -294,8 +294,8 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) return -EINVAL; if (bus->parent && bus->parent->of_node) - bus->parent->of_node->fwnode.flags |= - FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD; + fwnode_set_flag(&bus->parent->of_node->fwnode, + FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD); WARN(bus->state != MDIOBUS_ALLOCATED && bus->state != MDIOBUS_UNREGISTERED, diff --git a/drivers/of/base.c b/drivers/of/base.c index bf4a51887d74..180dbce65b98 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1943,7 +1943,7 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) if (name) of_stdout = of_find_node_opts_by_path(name, &of_stdout_options); if (of_stdout) - of_stdout->fwnode.flags |= FWNODE_FLAG_BEST_EFFORT; + fwnode_set_flag(&of_stdout->fwnode, FWNODE_FLAG_BEST_EFFORT); } if (!of_aliases) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 1a06175def37..ade288372101 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -225,7 +225,7 @@ static void __of_attach_node(struct device_node *np) np->sibling = np->parent->child; np->parent->child = np; of_node_clear_flag(np, OF_DETACHED); - np->fwnode.flags |= FWNODE_FLAG_NOT_DEVICE; + fwnode_set_flag(&np->fwnode, FWNODE_FLAG_NOT_DEVICE); raw_spin_unlock_irqrestore(&devtree_lock, flags); diff --git a/drivers/of/platform.c b/drivers/of/platform.c index ba591fbceb56..7eeaf8e27b5b 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -742,7 +742,7 @@ static int of_platform_notify(struct notifier_block *nb, * Clear the flag before adding the device so that fw_devlink * doesn't skip adding consumers to this device. */ - rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; + fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE); /* pdev_parent may be NULL when no bus platform device */ pdev_parent = of_find_device_by_node(parent); pdev = of_platform_device_create(rd->dn, NULL, diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 53dee314d76a..e686ecaf3dc8 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4937,7 +4937,7 @@ static int of_spi_notify(struct notifier_block *nb, unsigned long action, * Clear the flag before adding the device so that fw_devlink * doesn't skip adding consumers to this device. */ - rd->dn->fwnode.flags &= ~FWNODE_FLAG_NOT_DEVICE; + fwnode_clear_flag(&rd->dn->fwnode, FWNODE_FLAG_NOT_DEVICE); spi = of_register_spi_device(ctlr, rd->dn); put_device(&ctlr->dev); diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 097be89487bf..80b38fbf2121 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -15,6 +15,7 @@ #define _LINUX_FWNODE_H_ #include +#include #include #include #include @@ -42,12 +43,12 @@ struct device; * suppliers. Only enforce ordering with suppliers that have * drivers. */ -#define FWNODE_FLAG_LINKS_ADDED BIT(0) -#define FWNODE_FLAG_NOT_DEVICE BIT(1) -#define FWNODE_FLAG_INITIALIZED BIT(2) -#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) -#define FWNODE_FLAG_BEST_EFFORT BIT(4) -#define FWNODE_FLAG_VISITED BIT(5) +#define FWNODE_FLAG_LINKS_ADDED 0 +#define FWNODE_FLAG_NOT_DEVICE 1 +#define FWNODE_FLAG_INITIALIZED 2 +#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD 3 +#define FWNODE_FLAG_BEST_EFFORT 4 +#define FWNODE_FLAG_VISITED 5 struct fwnode_handle { struct fwnode_handle *secondary; @@ -57,7 +58,7 @@ struct fwnode_handle { struct device *dev; struct list_head suppliers; struct list_head consumers; - u8 flags; + unsigned long flags; }; /* @@ -212,16 +213,37 @@ static inline void fwnode_init(struct fwnode_handle *fwnode, INIT_LIST_HEAD(&fwnode->suppliers); } +static inline void fwnode_set_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + set_bit(bit, &fwnode->flags); +} + +static inline void fwnode_clear_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + clear_bit(bit, &fwnode->flags); +} + +static inline void fwnode_assign_flag(struct fwnode_handle *fwnode, + unsigned int bit, bool value) +{ + assign_bit(bit, &fwnode->flags, value); +} + +static inline bool fwnode_test_flag(struct fwnode_handle *fwnode, + unsigned int bit) +{ + return test_bit(bit, &fwnode->flags); +} + static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode, bool initialized) { if (IS_ERR_OR_NULL(fwnode)) return; - if (initialized) - fwnode->flags |= FWNODE_FLAG_INITIALIZED; - else - fwnode->flags &= ~FWNODE_FLAG_INITIALIZED; + fwnode_assign_flag(fwnode, FWNODE_FLAG_INITIALIZED, initialized); } int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, From a1fa010b3c9f0adae252986ddb54a449df45f700 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Mar 2026 10:51:59 +0100 Subject: [PATCH 50/67] driver core: auxiliary bus: Drop auxiliary_dev_pm_ops Since the PM core automatically falls back to using the driver PM callbacks directly if no bus type callbacks are present, it is not necessary to define a struct dev_pm_ops for a bus type that will only invoke driver PM callbacks from its PM callbacks. Accordingly, auxiliary_dev_pm_ops is redundant, so drop it. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/4738700.LvFx2qVVIh@rafael.j.wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 9fd3820d1f8a..16b8bdab30c8 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -207,11 +207,6 @@ static int auxiliary_uevent(const struct device *dev, struct kobj_uevent_env *en (int)(p - name), name); } -static const struct dev_pm_ops auxiliary_dev_pm_ops = { - SET_RUNTIME_PM_OPS(pm_generic_runtime_suspend, pm_generic_runtime_resume, NULL) - SET_SYSTEM_SLEEP_PM_OPS(pm_generic_suspend, pm_generic_resume) -}; - static int auxiliary_bus_probe(struct device *dev) { const struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver); @@ -258,7 +253,6 @@ static const struct bus_type auxiliary_bus_type = { .shutdown = auxiliary_bus_shutdown, .match = auxiliary_match, .uevent = auxiliary_uevent, - .pm = &auxiliary_dev_pm_ops, }; /** From 3210dabba4e4aff7395b68914bad95153dda6db7 Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Wed, 25 Mar 2026 17:09:05 +0800 Subject: [PATCH 51/67] driver core: simplify __device_set_driver_override() clearing logic Currently, __device_set_driver_override() handles clearing the override via empty string ("") and newline ("\n") in two separate paths. The "\n" case also performs an unnecessary memory allocation and immediate free. Simplify the logic by initializing 'new' to NULL and only allocating memory if the string length remains non-zero after stripping the trailing newline. Reduce code size, improve readability, and avoid unnecessary memory operations. No functional change intended. Suggested-by: Geert Uytterhoeven Link: https://lore.kernel.org/driver-core/DGS82WWLXPJ0.2EH4VJSF30UR5@kernel.org/ Signed-off-by: Gui-Dong Han Link: https://patch.msgid.link/20260325090905.169000-1-hanguidong02@gmail.com [ Narrow cp's scope to the newline handling block; use scoped_guard(). - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/base/dd.c | 52 ++++++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 37c7e54e0e4c..a46e28236068 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -383,8 +383,7 @@ __exitcall(deferred_probe_exit); int __device_set_driver_override(struct device *dev, const char *s, size_t len) { - const char *new, *old; - char *cp; + const char *new = NULL, *old; if (!s) return -EINVAL; @@ -404,37 +403,30 @@ int __device_set_driver_override(struct device *dev, const char *s, size_t len) */ len = strlen(s); - if (!len) { - /* Empty string passed - clear override */ - spin_lock(&dev->driver_override.lock); + /* Handle trailing newline */ + if (len) { + char *cp; + + cp = strnchr(s, len, '\n'); + if (cp) + len = cp - s; + } + + /* + * If empty string or "\n" passed, new remains NULL, clearing + * the driver_override.name. + */ + if (len) { + new = kstrndup(s, len, GFP_KERNEL); + if (!new) + return -ENOMEM; + } + + scoped_guard(spinlock, &dev->driver_override.lock) { old = dev->driver_override.name; - dev->driver_override.name = NULL; - spin_unlock(&dev->driver_override.lock); - kfree(old); - - return 0; - } - - cp = strnchr(s, len, '\n'); - if (cp) - len = cp - s; - - new = kstrndup(s, len, GFP_KERNEL); - if (!new) - return -ENOMEM; - - spin_lock(&dev->driver_override.lock); - old = dev->driver_override.name; - if (cp != s) { dev->driver_override.name = new; - spin_unlock(&dev->driver_override.lock); - } else { - /* "\n" passed - clear override */ - dev->driver_override.name = NULL; - spin_unlock(&dev->driver_override.lock); - - kfree(new); } + kfree(old); return 0; From 56e3ee721b33bdc4ce0765d370983aa4384f8a59 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 14 Mar 2026 09:49:16 +0100 Subject: [PATCH 52/67] driver core: Make deferred_probe_timeout default a Kconfig option Code using driver_deferred_probe_check_state() differs from most EPROBE_DEFER handling in the kernel. Where other EPROBE_DEFER handling (e.g. clks, gpios and regulators) waits indefinitely for suppliers to show up, code using driver_deferred_probe_check_state() will fail after the deferred_probe_timeout. This is a problem for generic distro kernels which want to support many boards using a single kernel build. These kernels want as much drivers to be modular as possible. The initrd also should be as small as possible, so the initrd will *not* have drivers not needing to get the rootfs. Combine this with waiting for a full-disk encryption password in the initrd and it is pretty much guaranteed that the default 10s timeout will be hit, causing probe() failures when drivers on the rootfs happen to get modprobe-d before other rootfs modules providing their suppliers. Make the default timeout configurable from Kconfig to allow distro kernel configs where many of the supplier drivers are modules to set the default through Kconfig. Reviewed-by: Saravana Kannan Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20260314084916.10868-1-johannes.goede@oss.qualcomm.com [ Drop deferred_probe_timeout documentation change in kernel-parameters.txt. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/base/Kconfig | 9 +++++++++ drivers/base/dd.c | 6 +----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 1786d87b29e2..f7d385cbd3ba 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -73,6 +73,15 @@ config DEVTMPFS_SAFE with the PROT_EXEC flag. This can break, for example, non-KMS video drivers. +config DRIVER_DEFERRED_PROBE_TIMEOUT + int "Default value for deferred_probe_timeout" + default 0 if !MODULES + default 10 if MODULES + help + Set the default value for the deferred_probe_timeout kernel parameter. + See Documentation/admin-guide/kernel-parameters.txt for a description + of the deferred_probe_timeout kernel parameter. + config STANDALONE bool "Select only drivers that don't need compile-time external firmware" default y diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a46e28236068..cb5046f0634d 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -257,11 +257,7 @@ static int deferred_devs_show(struct seq_file *s, void *data) } DEFINE_SHOW_ATTRIBUTE(deferred_devs); -#ifdef CONFIG_MODULES -static int driver_deferred_probe_timeout = 10; -#else -static int driver_deferred_probe_timeout; -#endif +static int driver_deferred_probe_timeout = CONFIG_DRIVER_DEFERRED_PROBE_TIMEOUT; static int __init deferred_probe_timeout_setup(char *str) { From 31de83980d3764d784f79ff1bc93c42b324f4013 Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Mon, 23 Mar 2026 16:58:44 +0800 Subject: [PATCH 53/67] debugfs: check for NULL pointer in debugfs_create_str() Passing a NULL pointer to debugfs_create_str() leads to a NULL pointer dereference when the debugfs file is read. Following upstream discussions, forbid the creation of debugfs string files with NULL pointers. Add a WARN_ON() to expose offending callers and return early. Fixes: 9af0440ec86e ("debugfs: Implement debugfs_create_str()") Reported-by: yangshiguang Closes: https://lore.kernel.org/lkml/2025122221-gag-malt-75ba@gregkh/ Suggested-by: Greg Kroah-Hartman Signed-off-by: Gui-Dong Han Link: https://patch.msgid.link/20260323085930.88894-2-hanguidong02@gmail.com Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 3376ab6a519d..a941d73251b0 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -1127,7 +1127,7 @@ static const struct file_operations fops_str_wo = { * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write - * from. + * from. This pointer and the string it points to must not be %NULL. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so @@ -1136,6 +1136,9 @@ static const struct file_operations fops_str_wo = { void debugfs_create_str(const char *name, umode_t mode, struct dentry *parent, char **value) { + if (WARN_ON(!value || !*value)) + return; + debugfs_create_mode_unsafe(name, mode, parent, value, &fops_str, &fops_str_ro, &fops_str_wo); } From 4afc929c0f74c4f22b055a82b371d50586da58ca Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Mon, 23 Mar 2026 16:58:45 +0800 Subject: [PATCH 54/67] debugfs: fix placement of EXPORT_SYMBOL_GPL for debugfs_create_str() The EXPORT_SYMBOL_GPL() for debugfs_create_str was placed incorrectly away from the function definition. Move it immediately below the debugfs_create_str() function where it belongs. Fixes: d60b59b96795 ("debugfs: Export debugfs_create_str symbol") Signed-off-by: Gui-Dong Han Link: https://patch.msgid.link/20260323085930.88894-3-hanguidong02@gmail.com Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index a941d73251b0..edd6aafbfbaa 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -1047,7 +1047,6 @@ ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, return ret; } -EXPORT_SYMBOL_GPL(debugfs_create_str); static ssize_t debugfs_write_file_str(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) @@ -1142,6 +1141,7 @@ void debugfs_create_str(const char *name, umode_t mode, debugfs_create_mode_unsafe(name, mode, parent, value, &fops_str, &fops_str_ro, &fops_str_wo); } +EXPORT_SYMBOL_GPL(debugfs_create_str); static ssize_t read_file_blob(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) From 7215e4552f31e53595eae56a834f7e286beecccc Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Mon, 23 Mar 2026 16:58:46 +0800 Subject: [PATCH 55/67] soundwire: debugfs: initialize firmware_file to empty string Passing NULL to debugfs_create_str() causes a NULL pointer dereference, and creating debugfs nodes with NULL string pointers is no longer permitted. Additionally, firmware_file is a global pointer. Previously, adding every new slave blindly overwrote it with NULL. Fix these issues by initializing firmware_file to an allocated empty string once in the subsystem init path (sdw_debugfs_init), and freeing it in the exit path. Existing driver code handles empty strings correctly. Fixes: fe46d2a4301d ("soundwire: debugfs: add interface to read/write commands") Reported-by: yangshiguang Closes: https://lore.kernel.org/lkml/17647e4c.d461.19b46144a4e.Coremail.yangshiguang1011@163.com/ Signed-off-by: Gui-Dong Han Link: https://patch.msgid.link/20260323085930.88894-4-hanguidong02@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/soundwire/debugfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c index ccc9670ef77c..2905ec19b838 100644 --- a/drivers/soundwire/debugfs.c +++ b/drivers/soundwire/debugfs.c @@ -358,8 +358,8 @@ void sdw_slave_debugfs_init(struct sdw_slave *slave) debugfs_create_file("go", 0200, d, slave, &cmd_go_fops); debugfs_create_file("read_buffer", 0400, d, slave, &read_buffer_fops); - firmware_file = NULL; - debugfs_create_str("firmware_file", 0200, d, &firmware_file); + if (firmware_file) + debugfs_create_str("firmware_file", 0200, d, &firmware_file); slave->debugfs = d; } @@ -371,10 +371,15 @@ void sdw_slave_debugfs_exit(struct sdw_slave *slave) void sdw_debugfs_init(void) { + if (!firmware_file) + firmware_file = kstrdup("", GFP_KERNEL); + sdw_debugfs_root = debugfs_create_dir("soundwire", NULL); } void sdw_debugfs_exit(void) { debugfs_remove_recursive(sdw_debugfs_root); + kfree(firmware_file); + firmware_file = NULL; } From 70fa0c308aa2db6859c2ea22473d421bdde56668 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 18 Mar 2026 15:21:40 +0100 Subject: [PATCH 56/67] device property: Document how to check for the property presence Currently it's unclear if one may or may not rely on the error codes returned from the property getters to check for the property presence. Clarify this by updating kernel-doc for fwnode_property_*() and device_property_*() where it's applicable. Reported-by: Guenter Roeck Closes: https://lore.kernel.org/r/4b24f1f4-b395-467a-81b7-1334a2d48845@roeck-us.net Signed-off-by: Andy Shevchenko Reviewed-by: Sakari Ailus Link: https://patch.msgid.link/20260318142404.2526642-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 8d9a34be57fb..d16e9c5f1921 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -38,6 +38,8 @@ EXPORT_SYMBOL_GPL(__dev_fwnode_const); * @propname: Name of the property * * Check if property @propname is present in the device firmware description. + * This function is the unambiguous way to check that given property is present + * in the device firmware description. * * Return: true if property @propname is present. Otherwise, returns false. */ @@ -52,6 +54,10 @@ EXPORT_SYMBOL_GPL(device_property_present); * @fwnode: Firmware node whose property to check * @propname: Name of the property * + * Check if property @propname is present in the firmware node description. + * This function is the unambiguous way to check that given property is present + * in the firmware node description. + * * Return: true if property @propname is present. Otherwise, returns false. */ bool fwnode_property_present(const struct fwnode_handle *fwnode, @@ -75,9 +81,9 @@ EXPORT_SYMBOL_GPL(fwnode_property_present); * @dev: Device whose property is being checked * @propname: Name of the property * - * Return if property @propname is true or false in the device firmware description. + * Use device_property_present() to check for the property presence. * - * Return: true if property @propname is present. Otherwise, returns false. + * Return: if property @propname is true or false in the device firmware description. */ bool device_property_read_bool(const struct device *dev, const char *propname) { @@ -90,7 +96,9 @@ EXPORT_SYMBOL_GPL(device_property_read_bool); * @fwnode: Firmware node whose property to check * @propname: Name of the property * - * Return if property @propname is true or false in the firmware description. + * Use fwnode_property_present() to check for the property presence. + * + * Return: if property @propname is true or false in the firmware node description. */ bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, const char *propname) From e4cabc973133c0623a63e6e3bc3d64e53a2e6d62 Mon Sep 17 00:00:00 2001 From: Kexin Sun Date: Sat, 21 Mar 2026 18:57:04 +0800 Subject: [PATCH 57/67] drivers/base/memory: fix stale reference to memory_block_add_nid() The function memory_block_add_nid() was renamed to memory_block_add_nid_early() by commit 0a947c14e48c ("drivers/base: move memory_block_add_nid() into the caller"). Update the stale reference in add_memory_block(). Assisted-by: unnamed:deepseek-v3.2 coccinelle Signed-off-by: Kexin Sun Reviewed-by: David Hildenbrand (Arm) Link: https://patch.msgid.link/20260321105704.6093-1-kexinsun@smail.nju.edu.cn Signed-off-by: Greg Kroah-Hartman --- drivers/base/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index a3091924918b..0d6ccc7cdf05 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -815,7 +815,7 @@ static int add_memory_block(unsigned long block_id, int nid, unsigned long state /* * MEM_ONLINE at this point implies early memory. With NUMA, * we'll determine the zone when setting the node id via - * memory_block_add_nid(). Memory hotplug updated the zone + * memory_block_add_nid_early(). Memory hotplug updated the zone * manually when memory onlining/offlining succeeds. */ mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE); From 704b2a7d756d0886a1388456ab41415a45973588 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 3 Apr 2026 10:09:17 +0200 Subject: [PATCH 58/67] MAINTAINERS: add ksysfs.c to the DRIVER CORE entry kernel/ksysfs.c is part of the driver core infrastructure but is missing rom the relevant MAINTAINERS entry. Add it. Suggested-by: Danilo Krummrich Closes: https://lore.kernel.org/all/DHITFGL3J4IE.2WZU3K2CSYL7I@kernel.org/ Signed-off-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260403080917.7484-1-bartosz.golaszewski@oss.qualcomm.com Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7d10988cbc62..2695f321bfd7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7807,6 +7807,7 @@ F: include/linux/fwnode.h F: include/linux/kobj* F: include/linux/property.h F: include/linux/sysfs.h +F: kernel/ksysfs.c F: lib/kobj* F: rust/kernel/debugfs.rs F: rust/kernel/debugfs/ From 9617b5b62c7cf4284740ba5efdbf083aa5a87e5f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 2 Apr 2026 16:15:02 +0200 Subject: [PATCH 59/67] kernel: ksysfs: initialize kernel_kobj earlier Software nodes depend on kernel_kobj which is initialized pretty late into the boot process - as a core_initcall(). Ahead of moving the software node initialization to driver_init() we must first make kernel_kobj available before it. Make ksysfs_init() visible in a new header - ksysfs.h - and call it in do_basic_setup() right before driver_init(). Signed-off-by: Bartosz Golaszewski Link: https://patch.msgid.link/20260402-nokia770-gpio-swnodes-v5-1-d730db3dd299@oss.qualcomm.com Signed-off-by: Danilo Krummrich --- MAINTAINERS | 1 + include/linux/ksysfs.h | 8 ++++++++ init/main.c | 2 ++ kernel/ksysfs.c | 9 ++++----- 4 files changed, 15 insertions(+), 5 deletions(-) create mode 100644 include/linux/ksysfs.h diff --git a/MAINTAINERS b/MAINTAINERS index 2695f321bfd7..fe7516481a44 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7805,6 +7805,7 @@ F: include/linux/debugfs.h F: include/linux/device.h F: include/linux/fwnode.h F: include/linux/kobj* +F: include/linux/ksysfs.h F: include/linux/property.h F: include/linux/sysfs.h F: kernel/ksysfs.c diff --git a/include/linux/ksysfs.h b/include/linux/ksysfs.h new file mode 100644 index 000000000000..c7dc6e18f28e --- /dev/null +++ b/include/linux/ksysfs.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _KSYSFS_H_ +#define _KSYSFS_H_ + +void ksysfs_init(void); + +#endif /* _KSYSFS_H_ */ diff --git a/init/main.c b/init/main.c index 1cb395dd94e4..2c9cce0bfe86 100644 --- a/init/main.c +++ b/init/main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1473,6 +1474,7 @@ static void __init do_initcalls(void) static void __init do_basic_setup(void) { cpuset_init_smp(); + ksysfs_init(); driver_init(); init_irq_proc(); do_ctors(); diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index a9e6354d9e25..f45ade718054 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -213,7 +214,7 @@ static const struct attribute_group kernel_attr_group = { .attrs = kernel_attrs, }; -static int __init ksysfs_init(void) +void __init ksysfs_init(void) { int error; @@ -234,14 +235,12 @@ static int __init ksysfs_init(void) goto group_exit; } - return 0; + return; group_exit: sysfs_remove_group(kernel_kobj, &kernel_attr_group); kset_exit: kobject_put(kernel_kobj); exit: - return error; + pr_err("failed to initialize the kernel kobject: %d\n", error); } - -core_initcall(ksysfs_init); From b3eb6a25b969910852bacbed9408afba4fc09c91 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 2 Apr 2026 16:15:03 +0200 Subject: [PATCH 60/67] software node: remove software_node_exit() software_node_exit() is an __exitcall() in a built-in compilation unit so effectively dead code. Remove it. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20260402-nokia770-gpio-swnodes-v5-2-d730db3dd299@oss.qualcomm.com Signed-off-by: Danilo Krummrich --- drivers/base/swnode.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 51320837f3a9..26cbe1c2a2e0 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1135,10 +1135,3 @@ static int __init software_node_init(void) return 0; } postcore_initcall(software_node_init); - -static void __exit software_node_exit(void) -{ - ida_destroy(&swnode_root_ids); - kset_unregister(swnode_kset); -} -__exitcall(software_node_exit); From 1cf996ac307e4d8d86f07d72b55528df88b56ce6 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 2 Apr 2026 16:15:04 +0200 Subject: [PATCH 61/67] driver core: make software nodes available earlier Software nodes are currently initialized in a function registered as a postcore_initcall(). However, some devices may want to register software nodes earlier than that (or also in a postcore_initcall() where they're at the mercy of the link order). Move the initialization to driver_init() making swnode available much earlier as well as making their initialization time deterministic. Suggested-by: Dmitry Torokhov Signed-off-by: Bartosz Golaszewski Acked-by: Andy Shevchenko Link: https://patch.msgid.link/20260402-nokia770-gpio-swnodes-v5-3-d730db3dd299@oss.qualcomm.com [ Fix typo in the commit message: "s/merci/mercy/". - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/base/base.h | 1 + drivers/base/init.c | 1 + drivers/base/swnode.c | 6 ++---- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/base/base.h b/drivers/base/base.h index afeda89fd471..30b416588617 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -312,6 +312,7 @@ static inline int devtmpfs_create_node(struct device *dev) { return 0; } static inline int devtmpfs_delete_node(struct device *dev) { return 0; } #endif +void software_node_init(void); void software_node_notify(struct device *dev); void software_node_notify_remove(struct device *dev); diff --git a/drivers/base/init.c b/drivers/base/init.c index 9d2b06d65dfc..af8014416c24 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -34,6 +34,7 @@ void __init driver_init(void) */ faux_bus_init(); of_core_init(); + software_node_init(); platform_bus_init(); auxiliary_bus_init(); memory_dev_init(); diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 26cbe1c2a2e0..a80575bf598b 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1127,11 +1127,9 @@ void software_node_notify_remove(struct device *dev) } } -static int __init software_node_init(void) +void __init software_node_init(void) { swnode_kset = kset_create_and_add("software_nodes", NULL, kernel_kobj); if (!swnode_kset) - return -ENOMEM; - return 0; + pr_err("failed to register software nodes\n"); } -postcore_initcall(software_node_init); From 10a4206a24013be4d558d476010cbf2eb4c9fa64 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Mar 2026 01:59:09 +0100 Subject: [PATCH 62/67] PCI: use generic driver_override infrastructure When a driver is probed through __driver_attach(), the bus' match() callback is called without the device lock held, thus accessing the driver_override field without a lock, which can cause a UAF. Fix this by using the driver-core driver_override infrastructure taking care of proper locking internally. Note that calling match() from __driver_attach() without the device lock held is intentional. [1] Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1] Reported-by: Gui-Dong Han Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789 Fixes: 782a985d7af2 ("PCI: Introduce new device binding path using pci_dev.driver_override") Acked-by: Bjorn Helgaas Acked-by: Alex Williamson Tested-by: Gui-Dong Han Reviewed-by: Gui-Dong Han Link: https://patch.msgid.link/20260324005919.2408620-6-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/pci/pci-driver.c | 11 +++++++---- drivers/pci/pci-sysfs.c | 28 ---------------------------- drivers/pci/probe.c | 1 - drivers/vfio/pci/vfio_pci_core.c | 5 ++--- drivers/xen/xen-pciback/pci_stub.c | 6 ++++-- include/linux/pci.h | 6 ------ 6 files changed, 13 insertions(+), 44 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index dd9075403987..d10ece0889f0 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -138,9 +138,11 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, { struct pci_dynid *dynid; const struct pci_device_id *found_id = NULL, *ids; + int ret; /* When driver_override is set, only bind to the matching driver */ - if (dev->driver_override && strcmp(dev->driver_override, drv->name)) + ret = device_match_driver_override(&dev->dev, &drv->driver); + if (ret == 0) return NULL; /* Look at the dynamic ids first, before the static ones */ @@ -164,7 +166,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, * matching. */ if (found_id->override_only) { - if (dev->driver_override) + if (ret > 0) return found_id; } else { return found_id; @@ -172,7 +174,7 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv, } /* driver_override will always match, send a dummy id */ - if (dev->driver_override) + if (ret > 0) return &pci_device_id_any; return NULL; } @@ -452,7 +454,7 @@ static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev) static inline bool pci_device_can_probe(struct pci_dev *pdev) { return (!pdev->is_virtfn || pdev->physfn->sriov->drivers_autoprobe || - pdev->driver_override); + device_has_driver_override(&pdev->dev)); } #else static inline bool pci_device_can_probe(struct pci_dev *pdev) @@ -1722,6 +1724,7 @@ static const struct cpumask *pci_device_irq_get_affinity(struct device *dev, const struct bus_type pci_bus_type = { .name = "pci", + .driver_override = true, .match = pci_bus_match, .uevent = pci_uevent, .probe = pci_device_probe, diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 16eaaf749ba9..a9006cf4e9c8 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -615,33 +615,6 @@ static ssize_t devspec_show(struct device *dev, static DEVICE_ATTR_RO(devspec); #endif -static ssize_t driver_override_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = driver_set_override(dev, &pdev->driver_override, buf, count); - if (ret) - return ret; - - return count; -} - -static ssize_t driver_override_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct pci_dev *pdev = to_pci_dev(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", pdev->driver_override); - device_unlock(dev); - return len; -} -static DEVICE_ATTR_RW(driver_override); - static struct attribute *pci_dev_attrs[] = { &dev_attr_power_state.attr, &dev_attr_resource.attr, @@ -669,7 +642,6 @@ static struct attribute *pci_dev_attrs[] = { #ifdef CONFIG_OF &dev_attr_devspec.attr, #endif - &dev_attr_driver_override.attr, &dev_attr_ari_enabled.attr, NULL, }; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index bccc7a4bdd79..b4707640e102 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2488,7 +2488,6 @@ static void pci_release_dev(struct device *dev) pci_release_of_node(pci_dev); pcibios_release_device(pci_dev); pci_bus_put(pci_dev->bus); - kfree(pci_dev->driver_override); bitmap_free(pci_dev->dma_alias_mask); dev_dbg(dev, "device released\n"); kfree(pci_dev); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index d43745fe4c84..460852f79f29 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1987,9 +1987,8 @@ static int vfio_pci_bus_notifier(struct notifier_block *nb, pdev->is_virtfn && physfn == vdev->pdev) { pci_info(vdev->pdev, "Captured SR-IOV VF %s driver_override\n", pci_name(pdev)); - pdev->driver_override = kasprintf(GFP_KERNEL, "%s", - vdev->vdev.ops->name); - WARN_ON(!pdev->driver_override); + WARN_ON(device_set_driver_override(&pdev->dev, + vdev->vdev.ops->name)); } else if (action == BUS_NOTIFY_BOUND_DRIVER && pdev->is_virtfn && physfn == vdev->pdev) { struct pci_driver *drv = pci_dev_driver(pdev); diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index e4b27aecbf05..79a2b5dfd694 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -598,6 +598,8 @@ static int pcistub_seize(struct pci_dev *dev, return err; } +static struct pci_driver xen_pcibk_pci_driver; + /* Called when 'bind'. This means we must _NOT_ call pci_reset_function or * other functions that take the sysfs lock. */ static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id) @@ -609,8 +611,8 @@ static int pcistub_probe(struct pci_dev *dev, const struct pci_device_id *id) match = pcistub_match(dev); - if ((dev->driver_override && - !strcmp(dev->driver_override, PCISTUB_DRIVER_NAME)) || + if (device_match_driver_override(&dev->dev, + &xen_pcibk_pci_driver.driver) > 0 || match) { if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL diff --git a/include/linux/pci.h b/include/linux/pci.h index 1c270f1d5123..57e9463e4347 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -575,12 +575,6 @@ struct pci_dev { u8 supported_speeds; /* Supported Link Speeds Vector */ phys_addr_t rom; /* Physical address if not from BAR */ size_t romlen; /* Length if not from BAR */ - /* - * Driver name to force a match. Do not set directly, because core - * frees it. Use driver_set_override() to set or clear it. - */ - const char *driver_override; - unsigned long priv_flags; /* Private flags for the PCI driver */ /* These methods index pci_reset_fn_methods[] */ From 8a700b1fc94df4d847a04f14ebc7f8532592b367 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Mar 2026 01:59:10 +0100 Subject: [PATCH 63/67] platform/wmi: use generic driver_override infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a driver is probed through __driver_attach(), the bus' match() callback is called without the device lock held, thus accessing the driver_override field without a lock, which can cause a UAF. Fix this by using the driver-core driver_override infrastructure taking care of proper locking internally. Note that calling match() from __driver_attach() without the device lock held is intentional. [1] Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1] Reported-by: Gui-Dong Han Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789 Fixes: 12046f8c77e0 ("platform/x86: wmi: Add driver_override support") Reviewed-by: Armin Wolf Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20260324005919.2408620-7-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/platform/wmi/core.c | 36 +++++------------------------------- include/linux/wmi.h | 4 ---- 2 files changed, 5 insertions(+), 35 deletions(-) diff --git a/drivers/platform/wmi/core.c b/drivers/platform/wmi/core.c index b8e6b9a421c6..750e3619724e 100644 --- a/drivers/platform/wmi/core.c +++ b/drivers/platform/wmi/core.c @@ -842,39 +842,11 @@ static ssize_t expensive_show(struct device *dev, } static DEVICE_ATTR_RO(expensive); -static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct wmi_device *wdev = to_wmi_device(dev); - ssize_t ret; - - device_lock(dev); - ret = sysfs_emit(buf, "%s\n", wdev->driver_override); - device_unlock(dev); - - return ret; -} - -static ssize_t driver_override_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct wmi_device *wdev = to_wmi_device(dev); - int ret; - - ret = driver_set_override(dev, &wdev->driver_override, buf, count); - if (ret < 0) - return ret; - - return count; -} -static DEVICE_ATTR_RW(driver_override); - static struct attribute *wmi_attrs[] = { &dev_attr_modalias.attr, &dev_attr_guid.attr, &dev_attr_instance_count.attr, &dev_attr_expensive.attr, - &dev_attr_driver_override.attr, NULL }; ATTRIBUTE_GROUPS(wmi); @@ -943,7 +915,6 @@ static void wmi_dev_release(struct device *dev) { struct wmi_block *wblock = dev_to_wblock(dev); - kfree(wblock->dev.driver_override); kfree(wblock); } @@ -952,10 +923,12 @@ static int wmi_dev_match(struct device *dev, const struct device_driver *driver) const struct wmi_driver *wmi_driver = to_wmi_driver(driver); struct wmi_block *wblock = dev_to_wblock(dev); const struct wmi_device_id *id = wmi_driver->id_table; + int ret; /* When driver_override is set, only bind to the matching driver */ - if (wblock->dev.driver_override) - return !strcmp(wblock->dev.driver_override, driver->name); + ret = device_match_driver_override(dev, driver); + if (ret >= 0) + return ret; if (id == NULL) return 0; @@ -1076,6 +1049,7 @@ static struct class wmi_bus_class = { static const struct bus_type wmi_bus_type = { .name = "wmi", .dev_groups = wmi_groups, + .driver_override = true, .match = wmi_dev_match, .uevent = wmi_dev_uevent, .probe = wmi_dev_probe, diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 75cb0c7cfe57..14fb644e1701 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -18,16 +18,12 @@ * struct wmi_device - WMI device structure * @dev: Device associated with this WMI device * @setable: True for devices implementing the Set Control Method - * @driver_override: Driver name to force a match; do not set directly, - * because core frees it; use driver_set_override() to - * set or clear it. * * This represents WMI devices discovered by the WMI driver core. */ struct wmi_device { struct device dev; bool setable; - const char *driver_override; }; /** From 85bb534ff12aab6916058897b39c748940a7a4c6 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Mar 2026 01:59:12 +0100 Subject: [PATCH 64/67] vdpa: use generic driver_override infrastructure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a driver is probed through __driver_attach(), the bus' match() callback is called without the device lock held, thus accessing the driver_override field without a lock, which can cause a UAF. Fix this by using the driver-core driver_override infrastructure taking care of proper locking internally. Note that calling match() from __driver_attach() without the device lock held is intentional. [1] Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1] Reported-by: Gui-Dong Han Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789 Fixes: 539fec78edb4 ("vdpa: add driver_override support") Acked-by: Eugenio Pérez Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20260324005919.2408620-9-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/vdpa/vdpa.c | 48 +++++--------------------------------------- include/linux/vdpa.h | 4 ---- 2 files changed, 5 insertions(+), 47 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 34874beb0152..caf0ee5d6856 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -67,57 +67,20 @@ static void vdpa_dev_remove(struct device *d) static int vdpa_dev_match(struct device *dev, const struct device_driver *drv) { - struct vdpa_device *vdev = dev_to_vdpa(dev); + int ret; /* Check override first, and if set, only use the named driver */ - if (vdev->driver_override) - return strcmp(vdev->driver_override, drv->name) == 0; + ret = device_match_driver_override(dev, drv); + if (ret >= 0) + return ret; /* Currently devices must be supported by all vDPA bus drivers */ return 1; } -static ssize_t driver_override_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct vdpa_device *vdev = dev_to_vdpa(dev); - int ret; - - ret = driver_set_override(dev, &vdev->driver_override, buf, count); - if (ret) - return ret; - - return count; -} - -static ssize_t driver_override_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct vdpa_device *vdev = dev_to_vdpa(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", vdev->driver_override); - device_unlock(dev); - - return len; -} -static DEVICE_ATTR_RW(driver_override); - -static struct attribute *vdpa_dev_attrs[] = { - &dev_attr_driver_override.attr, - NULL, -}; - -static const struct attribute_group vdpa_dev_group = { - .attrs = vdpa_dev_attrs, -}; -__ATTRIBUTE_GROUPS(vdpa_dev); - static const struct bus_type vdpa_bus = { .name = "vdpa", - .dev_groups = vdpa_dev_groups, + .driver_override = true, .match = vdpa_dev_match, .probe = vdpa_dev_probe, .remove = vdpa_dev_remove, @@ -132,7 +95,6 @@ static void vdpa_release_dev(struct device *d) ops->free(vdev); ida_free(&vdpa_index_ida, vdev->index); - kfree(vdev->driver_override); kfree(vdev); } diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 2bfe3baa63f4..782c42d25db1 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -72,9 +72,6 @@ struct vdpa_mgmt_dev; * struct vdpa_device - representation of a vDPA device * @dev: underlying device * @vmap: the metadata passed to upper layer to be used for mapping - * @driver_override: driver name to force a match; do not set directly, - * because core frees it; use driver_set_override() to - * set or clear it. * @config: the configuration ops for this device. * @map: the map ops for this device * @cf_lock: Protects get and set access to configuration layout. @@ -90,7 +87,6 @@ struct vdpa_mgmt_dev; struct vdpa_device { struct device dev; union virtio_map vmap; - const char *driver_override; const struct vdpa_config_ops *config; const struct virtio_map_ops *map; struct rw_semaphore cf_lock; /* Protects get/set config */ From ac4d8bb6e2e13e8684a76ea48d13ebaaaf5c24c4 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Mar 2026 01:59:13 +0100 Subject: [PATCH 65/67] s390/cio: use generic driver_override infrastructure When a driver is probed through __driver_attach(), the bus' match() callback is called without the device lock held, thus accessing the driver_override field without a lock, which can cause a UAF. Fix this by using the driver-core driver_override infrastructure taking care of proper locking internally. Note that calling match() from __driver_attach() without the device lock held is intentional. [1] Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1] Reported-by: Gui-Dong Han Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789 Fixes: ebc3d1791503 ("s390/cio: introduce driver_override on the css bus") Reviewed-by: Vineeth Vijayan Link: https://patch.msgid.link/20260324005919.2408620-10-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/s390/cio/cio.h | 5 ----- drivers/s390/cio/css.c | 34 ++++------------------------------ 2 files changed, 4 insertions(+), 35 deletions(-) diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index 08a5e9380e75..bad142c536e1 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -103,11 +103,6 @@ struct subchannel { struct work_struct todo_work; struct schib_config config; u64 dma_mask; - /* - * Driver name to force a match. Do not set directly, because core - * frees it. Use driver_set_override() to set or clear it. - */ - const char *driver_override; } __attribute__ ((aligned(8))); DECLARE_PER_CPU_ALIGNED(struct irb, cio_irb); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 5ab239f38588..e5a0ec6b4e3e 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -159,7 +159,6 @@ static void css_subchannel_release(struct device *dev) sch->config.intparm = 0; cio_commit_config(sch); - kfree(sch->driver_override); kfree(sch); } @@ -323,37 +322,9 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RO(modalias); -static ssize_t driver_override_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct subchannel *sch = to_subchannel(dev); - int ret; - - ret = driver_set_override(dev, &sch->driver_override, buf, count); - if (ret) - return ret; - - return count; -} - -static ssize_t driver_override_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct subchannel *sch = to_subchannel(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", sch->driver_override); - device_unlock(dev); - return len; -} -static DEVICE_ATTR_RW(driver_override); - static struct attribute *subch_attrs[] = { &dev_attr_type.attr, &dev_attr_modalias.attr, - &dev_attr_driver_override.attr, NULL, }; @@ -1356,9 +1327,11 @@ static int css_bus_match(struct device *dev, const struct device_driver *drv) struct subchannel *sch = to_subchannel(dev); const struct css_driver *driver = to_cssdriver(drv); struct css_device_id *id; + int ret; /* When driver_override is set, only bind to the matching driver */ - if (sch->driver_override && strcmp(sch->driver_override, drv->name)) + ret = device_match_driver_override(dev, drv); + if (ret == 0) return 0; for (id = driver->subchannel_type; id->match_flags; id++) { @@ -1415,6 +1388,7 @@ static int css_uevent(const struct device *dev, struct kobj_uevent_env *env) static const struct bus_type css_bus_type = { .name = "css", + .driver_override = true, .match = css_bus_match, .probe = css_probe, .remove = css_remove, From 81d6f7c3a70b10ff757ee8b5f8114a190871cf1e Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Mar 2026 01:59:14 +0100 Subject: [PATCH 66/67] s390/ap: use generic driver_override infrastructure When the AP masks are updated via apmask_store() or aqmask_store(), ap_bus_revise_bindings() is called after ap_attr_mutex has been released. This calls __ap_revise_reserved(), which accesses the driver_override field without holding any lock, racing against a concurrent driver_override_store() that may free the old string, resulting in a potential UAF. Fix this by using the driver-core driver_override infrastructure, which protects all accesses with an internal spinlock. Note that unlike most other buses, the AP bus does not check driver_override in its match() callback; the override is checked in ap_device_probe() and __ap_revise_reserved() instead. Also note that we do not enable the driver_override feature of struct bus_type, as AP - in contrast to most other buses - passes "" to sysfs_emit() when the driver_override pointer is NULL. Thus, printing "\n" instead of "(null)\n". Additionally, AP has a custom counter that is modified in the corresponding custom driver_override_store(). Fixes: d38a87d7c064 ("s390/ap: Support driver_override for AP queue devices") Tested-by: Holger Dengler Reviewed-by: Holger Dengler Reviewed-by: Harald Freudenberger Link: https://patch.msgid.link/20260324005919.2408620-11-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/s390/crypto/ap_bus.c | 34 +++++++++++++++++----------------- drivers/s390/crypto/ap_bus.h | 1 - drivers/s390/crypto/ap_queue.c | 24 ++++++------------------ 3 files changed, 23 insertions(+), 36 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index d652df96a507..f24e27add721 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -859,25 +859,24 @@ static int __ap_queue_devices_with_id_unregister(struct device *dev, void *data) static int __ap_revise_reserved(struct device *dev, void *dummy) { - int rc, card, queue, devres, drvres; + int rc, card, queue, devres, drvres, ovrd; if (is_queue_dev(dev)) { struct ap_driver *ap_drv = to_ap_drv(dev->driver); struct ap_queue *aq = to_ap_queue(dev); - struct ap_device *ap_dev = &aq->ap_dev; card = AP_QID_CARD(aq->qid); queue = AP_QID_QUEUE(aq->qid); - if (ap_dev->driver_override) { - if (strcmp(ap_dev->driver_override, - ap_drv->driver.name)) { - pr_debug("reprobing queue=%02x.%04x\n", card, queue); - rc = device_reprobe(dev); - if (rc) { - AP_DBF_WARN("%s reprobing queue=%02x.%04x failed\n", - __func__, card, queue); - } + ovrd = device_match_driver_override(dev, &ap_drv->driver); + if (ovrd > 0) { + /* override set and matches, nothing to do */ + } else if (ovrd == 0) { + pr_debug("reprobing queue=%02x.%04x\n", card, queue); + rc = device_reprobe(dev); + if (rc) { + AP_DBF_WARN("%s reprobing queue=%02x.%04x failed\n", + __func__, card, queue); } } else { mutex_lock(&ap_attr_mutex); @@ -928,7 +927,7 @@ int ap_owned_by_def_drv(int card, int queue) if (aq) { const struct device_driver *drv = aq->ap_dev.device.driver; const struct ap_driver *ap_drv = to_ap_drv(drv); - bool override = !!aq->ap_dev.driver_override; + bool override = device_has_driver_override(&aq->ap_dev.device); if (override && drv && ap_drv->flags & AP_DRIVER_FLAG_DEFAULT) rc = 1; @@ -977,7 +976,7 @@ static int ap_device_probe(struct device *dev) { struct ap_device *ap_dev = to_ap_dev(dev); struct ap_driver *ap_drv = to_ap_drv(dev->driver); - int card, queue, devres, drvres, rc = -ENODEV; + int card, queue, devres, drvres, rc = -ENODEV, ovrd; if (!get_device(dev)) return rc; @@ -991,10 +990,11 @@ static int ap_device_probe(struct device *dev) */ card = AP_QID_CARD(to_ap_queue(dev)->qid); queue = AP_QID_QUEUE(to_ap_queue(dev)->qid); - if (ap_dev->driver_override) { - if (strcmp(ap_dev->driver_override, - ap_drv->driver.name)) - goto out; + ovrd = device_match_driver_override(dev, &ap_drv->driver); + if (ovrd > 0) { + /* override set and matches, nothing to do */ + } else if (ovrd == 0) { + goto out; } else { mutex_lock(&ap_attr_mutex); devres = test_bit_inv(card, ap_perms.apm) && diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 51e08f27bd75..04ea256ecf91 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -166,7 +166,6 @@ void ap_driver_unregister(struct ap_driver *); struct ap_device { struct device device; int device_type; /* AP device type. */ - const char *driver_override; }; #define to_ap_dev(x) container_of((x), struct ap_device, device) diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 3fe2e41c5c6b..ca9819e6f7e7 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -734,26 +734,14 @@ static ssize_t driver_override_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct ap_queue *aq = to_ap_queue(dev); - struct ap_device *ap_dev = &aq->ap_dev; - int rc; - - device_lock(dev); - if (ap_dev->driver_override) - rc = sysfs_emit(buf, "%s\n", ap_dev->driver_override); - else - rc = sysfs_emit(buf, "\n"); - device_unlock(dev); - - return rc; + guard(spinlock)(&dev->driver_override.lock); + return sysfs_emit(buf, "%s\n", dev->driver_override.name ?: ""); } static ssize_t driver_override_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct ap_queue *aq = to_ap_queue(dev); - struct ap_device *ap_dev = &aq->ap_dev; int rc = -EINVAL; bool old_value; @@ -764,13 +752,13 @@ static ssize_t driver_override_store(struct device *dev, if (ap_apmask_aqmask_in_use) goto out; - old_value = ap_dev->driver_override ? true : false; - rc = driver_set_override(dev, &ap_dev->driver_override, buf, count); + old_value = device_has_driver_override(dev); + rc = __device_set_driver_override(dev, buf, count); if (rc) goto out; - if (old_value && !ap_dev->driver_override) + if (old_value && !device_has_driver_override(dev)) --ap_driver_override_ctr; - else if (!old_value && ap_dev->driver_override) + else if (!old_value && device_has_driver_override(dev)) ++ap_driver_override_ctr; rc = count; From 6c8dfb0362732bf1e4829867a2a5239fedc592d0 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Mar 2026 01:59:06 +0100 Subject: [PATCH 67/67] bus: fsl-mc: use generic driver_override infrastructure When a driver is probed through __driver_attach(), the bus' match() callback is called without the device lock held, thus accessing the driver_override field without a lock, which can cause a UAF. Fix this by using the driver-core driver_override infrastructure taking care of proper locking internally. Note that calling match() from __driver_attach() without the device lock held is intentional. [1] Tested-by: Ioana Ciornei Acked-by: Ioana Ciornei Acked-by: Christophe Leroy (CS GROUP) Link: https://lore.kernel.org/driver-core/DGRGTIRHA62X.3RY09D9SOK77P@kernel.org/ [1] Reported-by: Gui-Dong Han Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220789 Fixes: 1f86a00c1159 ("bus/fsl-mc: add support for 'driver_override' in the mc-bus") Link: https://patch.msgid.link/20260324005919.2408620-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/bus/fsl-mc/fsl-mc-bus.c | 43 +++++-------------------------- drivers/vfio/fsl-mc/vfio_fsl_mc.c | 4 +-- include/linux/fsl/mc.h | 4 --- 3 files changed, 8 insertions(+), 43 deletions(-) diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index c117745cf206..221146e4860b 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -86,12 +86,16 @@ static int fsl_mc_bus_match(struct device *dev, const struct device_driver *drv) struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); const struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(drv); bool found = false; + int ret; /* When driver_override is set, only bind to the matching driver */ - if (mc_dev->driver_override) { - found = !strcmp(mc_dev->driver_override, mc_drv->driver.name); + ret = device_match_driver_override(dev, drv); + if (ret > 0) { + found = true; goto out; } + if (ret == 0) + goto out; if (!mc_drv->match_id_table) goto out; @@ -210,39 +214,8 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(modalias); -static ssize_t driver_override_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); - int ret; - - if (WARN_ON(dev->bus != &fsl_mc_bus_type)) - return -EINVAL; - - ret = driver_set_override(dev, &mc_dev->driver_override, buf, count); - if (ret) - return ret; - - return count; -} - -static ssize_t driver_override_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); - ssize_t len; - - device_lock(dev); - len = sysfs_emit(buf, "%s\n", mc_dev->driver_override); - device_unlock(dev); - return len; -} -static DEVICE_ATTR_RW(driver_override); - static struct attribute *fsl_mc_dev_attrs[] = { &dev_attr_modalias.attr, - &dev_attr_driver_override.attr, NULL, }; @@ -345,6 +318,7 @@ ATTRIBUTE_GROUPS(fsl_mc_bus); const struct bus_type fsl_mc_bus_type = { .name = "fsl-mc", + .driver_override = true, .match = fsl_mc_bus_match, .uevent = fsl_mc_bus_uevent, .probe = fsl_mc_probe, @@ -910,9 +884,6 @@ static struct notifier_block fsl_mc_nb; */ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev) { - kfree(mc_dev->driver_override); - mc_dev->driver_override = NULL; - /* * The device-specific remove callback will get invoked by device_del() */ diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index 462fae1aa538..b4c3958201b2 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -424,9 +424,7 @@ static int vfio_fsl_mc_bus_notifier(struct notifier_block *nb, if (action == BUS_NOTIFY_ADD_DEVICE && vdev->mc_dev == mc_cont) { - mc_dev->driver_override = kasprintf(GFP_KERNEL, "%s", - vfio_fsl_mc_ops.name); - if (!mc_dev->driver_override) + if (device_set_driver_override(dev, vfio_fsl_mc_ops.name)) dev_warn(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s failed\n", dev_name(&mc_cont->dev)); else diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 897d6211c163..1da63f2d7040 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -178,9 +178,6 @@ struct fsl_mc_obj_desc { * @regions: pointer to array of MMIO region entries * @irqs: pointer to array of pointers to interrupts allocated to this device * @resource: generic resource associated with this MC object device, if any. - * @driver_override: driver name to force a match; do not set directly, - * because core frees it; use driver_set_override() to - * set or clear it. * * Generic device object for MC object devices that are "attached" to a * MC bus. @@ -214,7 +211,6 @@ struct fsl_mc_device { struct fsl_mc_device_irq **irqs; struct fsl_mc_resource *resource; struct device_link *consumer_link; - const char *driver_override; }; #define to_fsl_mc_device(_dev) \