From 14f2e2ebf31157a873536a7212502bd955b69647 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Sun, 22 Mar 2026 19:53:34 +0000 Subject: [PATCH 01/17] dax/bus: Use dax_region_put() in alloc_dax_region() error path alloc_dax_region() calls kref_init() on the dax_region early in the function, but the error path for sysfs_create_groups() failure uses kfree() directly to free the dax_region. This bypasses the kref lifecycle. Use dax_region_put() instead to handle kref lifecycle correctly. Suggested-by: Jonathan Cameron Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260322195343.206900-2-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/dax/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index c94c09622516..299134c9b294 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -668,7 +668,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, }; if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { - kfree(dax_region); + dax_region_put(dax_region); return NULL; } From 116be1e112cbcb664887e44b74f27316a5fef861 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Sun, 22 Mar 2026 19:53:35 +0000 Subject: [PATCH 02/17] dax/hmem: Factor HMEM registration into __hmem_register_device() Separate the CXL overlap check from the HMEM registration path and keep the platform-device setup in a dedicated __hmem_register_device(). This makes hmem_register_device() the policy entry point for deciding whether a range should be deferred to CXL, while __hmem_register_device() handles the HMEM registration flow. No functional changes. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260322195343.206900-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/dax/hmem/hmem.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 1cf7c2a0ee1c..a3d45032355c 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -58,21 +58,14 @@ static void release_hmem(void *pdev) platform_device_unregister(pdev); } -static int hmem_register_device(struct device *host, int target_nid, - const struct resource *res) +static int __hmem_register_device(struct device *host, int target_nid, + const struct resource *res) { struct platform_device *pdev; struct memregion_info info; long id; int rc; - if (IS_ENABLED(CONFIG_CXL_REGION) && - region_intersects(res->start, resource_size(res), IORESOURCE_MEM, - IORES_DESC_CXL) != REGION_DISJOINT) { - dev_dbg(host, "deferring range to CXL: %pr\n", res); - return 0; - } - rc = region_intersects_soft_reserve(res->start, resource_size(res)); if (rc != REGION_INTERSECTS) return 0; @@ -123,6 +116,19 @@ static int hmem_register_device(struct device *host, int target_nid, return rc; } +static int hmem_register_device(struct device *host, int target_nid, + const struct resource *res) +{ + if (IS_ENABLED(CONFIG_CXL_REGION) && + region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_CXL) != REGION_DISJOINT) { + dev_dbg(host, "deferring range to CXL: %pr\n", res); + return 0; + } + + return __hmem_register_device(host, target_nid, res); +} + static int dax_hmem_platform_probe(struct platform_device *pdev) { return walk_hmem_resources(&pdev->dev, hmem_register_device); From 7b4bcaadfe00e2447c84378291e854ea87a2a41c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 22 Mar 2026 19:53:36 +0000 Subject: [PATCH 03/17] dax/hmem: Request cxl_acpi and cxl_pci before walking Soft Reserved ranges Ensure cxl_acpi has published CXL Window resources before HMEM walks Soft Reserved ranges. Replace MODULE_SOFTDEP("pre: cxl_acpi") with an explicit, synchronous request_module("cxl_acpi"). MODULE_SOFTDEP() only guarantees eventual loading, it does not enforce that the dependency has finished init before the current module runs. This can cause HMEM to start before cxl_acpi has populated the resource tree, breaking detection of overlaps between Soft Reserved and CXL Windows. Also, request cxl_pci before HMEM walks Soft Reserved ranges. Unlike cxl_acpi, cxl_pci attach is asynchronous and creates dependent devices that trigger further module loads. Asynchronous probe flushing (wait_for_device_probe()) is added later in the series in a deferred context before HMEM makes ownership decisions for Soft Reserved ranges. Add an additional explicit Kconfig ordering so that CXL_ACPI and CXL_PCI must be initialized before DEV_DAX_HMEM. This prevents HMEM from consuming Soft Reserved ranges before CXL drivers have had a chance to claim them. Signed-off-by: Smita Koralahalli Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Tested-by: Tomasz Wolski Link: https://patch.msgid.link/20260322195343.206900-4-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/dax/Kconfig | 2 ++ drivers/dax/hmem/hmem.c | 17 ++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index d656e4c0eb84..3683bb3f2311 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -48,6 +48,8 @@ config DEV_DAX_CXL tristate "CXL DAX: direct access to CXL RAM regions" depends on CXL_BUS && CXL_REGION && DEV_DAX default CXL_REGION && DEV_DAX + depends on CXL_ACPI >= DEV_DAX_HMEM + depends on CXL_PCI >= DEV_DAX_HMEM help CXL RAM regions are either mapped by platform-firmware and published in the initial system-memory map as "System RAM", mapped diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index a3d45032355c..85e751675f65 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -145,6 +145,16 @@ static __init int dax_hmem_init(void) { int rc; + /* + * Ensure that cxl_acpi and cxl_pci have a chance to kick off + * CXL topology discovery at least once before scanning the + * iomem resource tree for IORES_DESC_CXL resources. + */ + if (IS_ENABLED(CONFIG_DEV_DAX_CXL)) { + request_module("cxl_acpi"); + request_module("cxl_pci"); + } + rc = platform_driver_register(&dax_hmem_platform_driver); if (rc) return rc; @@ -165,13 +175,6 @@ static __exit void dax_hmem_exit(void) module_init(dax_hmem_init); module_exit(dax_hmem_exit); -/* Allow for CXL to define its own dax regions */ -#if IS_ENABLED(CONFIG_CXL_REGION) -#if IS_MODULE(CONFIG_CXL_ACPI) -MODULE_SOFTDEP("pre: cxl_acpi"); -#endif -#endif - MODULE_ALIAS("platform:hmem*"); MODULE_ALIAS("platform:hmem_platform*"); MODULE_DESCRIPTION("HMEM DAX: direct access to 'specific purpose' memory"); From edfcf1e21e79ddd6990a1330597c2eb072330832 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 22 Mar 2026 19:53:37 +0000 Subject: [PATCH 04/17] dax/hmem: Gate Soft Reserved deferral on DEV_DAX_CXL Replace IS_ENABLED(CONFIG_CXL_REGION) with IS_ENABLED(CONFIG_DEV_DAX_CXL) so that HMEM only defers Soft Reserved ranges when CXL DAX support is enabled. This makes the coordination between HMEM and the CXL stack more precise and prevents deferral in unrelated CXL configurations. Signed-off-by: Smita Koralahalli Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20260322195343.206900-5-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/dax/hmem/hmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 85e751675f65..ca752db03201 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -119,7 +119,7 @@ static int __hmem_register_device(struct device *host, int target_nid, static int hmem_register_device(struct device *host, int target_nid, const struct resource *res) { - if (IS_ENABLED(CONFIG_CXL_REGION) && + if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && region_intersects(res->start, resource_size(res), IORESOURCE_MEM, IORES_DESC_CXL) != REGION_DISJOINT) { dev_dbg(host, "deferring range to CXL: %pr\n", res); From 39aa1d4be12bf9f685adaa06aa2d997c1c611b16 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 22 Mar 2026 19:53:38 +0000 Subject: [PATCH 05/17] dax/cxl, hmem: Initialize hmem early and defer dax_cxl binding Move hmem/ earlier in the dax Makefile so that hmem_init() runs before dax_cxl. In addition, defer registration of the dax_cxl driver to a workqueue instead of using module_cxl_driver(). This ensures that dax_hmem has an opportunity to initialize and register its deferred callback and make ownership decisions before dax_cxl begins probing and claiming Soft Reserved ranges. Mark the dax_cxl driver as PROBE_PREFER_ASYNCHRONOUS so its probe runs out of line from other synchronous probing avoiding ordering dependencies while coordinating ownership decisions with dax_hmem. Signed-off-by: Smita Koralahalli Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Tested-by: Tomasz Wolski Link: https://patch.msgid.link/20260322195343.206900-6-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/dax/Makefile | 3 +-- drivers/dax/cxl.c | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile index 5ed5c39857c8..70e996bf1526 100644 --- a/drivers/dax/Makefile +++ b/drivers/dax/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +obj-y += hmem/ obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o @@ -10,5 +11,3 @@ dax-y += bus.o device_dax-y := device.o dax_pmem-y := pmem.o dax_cxl-y := cxl.o - -obj-y += hmem/ diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c index 13cd94d32ff7..a2136adfa186 100644 --- a/drivers/dax/cxl.c +++ b/drivers/dax/cxl.c @@ -38,10 +38,35 @@ static struct cxl_driver cxl_dax_region_driver = { .id = CXL_DEVICE_DAX_REGION, .drv = { .suppress_bind_attrs = true, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; -module_cxl_driver(cxl_dax_region_driver); +static void cxl_dax_region_driver_register(struct work_struct *work) +{ + cxl_driver_register(&cxl_dax_region_driver); +} + +static DECLARE_WORK(cxl_dax_region_driver_work, cxl_dax_region_driver_register); + +static int __init cxl_dax_region_init(void) +{ + /* + * Need to resolve a race with dax_hmem wanting to drive regions + * instead of CXL + */ + queue_work(system_long_wq, &cxl_dax_region_driver_work); + return 0; +} +module_init(cxl_dax_region_init); + +static void __exit cxl_dax_region_exit(void) +{ + flush_work(&cxl_dax_region_driver_work); + cxl_driver_unregister(&cxl_dax_region_driver); +} +module_exit(cxl_dax_region_exit); + MODULE_ALIAS_CXL(CXL_DEVICE_DAX_REGION); MODULE_DESCRIPTION("CXL DAX: direct access to CXL regions"); MODULE_LICENSE("GPL"); From 34f80bb969cc1710f336ea1878781780a59fc8e7 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Sun, 22 Mar 2026 19:53:39 +0000 Subject: [PATCH 06/17] dax: Track all dax_region allocations under a global resource tree Introduce a global "DAX Regions" resource root and register each dax_region->res under it via request_resource(). Release the resource on dax_region teardown. By enforcing a single global namespace for dax_region allocations, this ensures only one of dax_hmem or dax_cxl can successfully register a dax_region for a given range. Suggested-by: Dan Williams Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260322195343.206900-7-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/dax/bus.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 299134c9b294..68437c05e21d 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -10,6 +10,7 @@ #include "dax-private.h" #include "bus.h" +static struct resource dax_regions = DEFINE_RES_MEM_NAMED(0, -1, "DAX Regions"); static DEFINE_MUTEX(dax_bus_lock); /* @@ -627,6 +628,7 @@ static void dax_region_unregister(void *region) sysfs_remove_groups(&dax_region->dev->kobj, dax_region_attribute_groups); + release_resource(&dax_region->res); dax_region_put(dax_region); } @@ -635,6 +637,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, unsigned long flags) { struct dax_region *dax_region; + int rc; /* * The DAX core assumes that it can store its private data in @@ -667,14 +670,25 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id, .flags = IORESOURCE_MEM | flags, }; - if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { - dax_region_put(dax_region); - return NULL; + rc = request_resource(&dax_regions, &dax_region->res); + if (rc) { + dev_dbg(parent, "dax_region resource conflict for %pR\n", + &dax_region->res); + goto err_res; } + if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) + goto err_sysfs; + if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) return NULL; return dax_region; + +err_sysfs: + release_resource(&dax_region->res); +err_res: + dax_region_put(dax_region); + return NULL; } EXPORT_SYMBOL_GPL(alloc_dax_region); From 8e65f99b525b3f49b87db0db0d0e0fc1a0c53e40 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Sun, 22 Mar 2026 19:53:40 +0000 Subject: [PATCH 07/17] cxl/region: Add helper to check Soft Reserved containment by CXL regions Add a helper to determine whether a given Soft Reserved memory range is fully contained within the committed CXL region. This helper provides a primitive for policy decisions in subsequent patches such as co-ordination with dax_hmem to determine whether CXL has fully claimed ownership of Soft Reserved memory ranges. Signed-off-by: Smita Koralahalli Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Dan Williams Link: https://patch.msgid.link/20260322195343.206900-8-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 30 ++++++++++++++++++++++++++++++ include/cxl/cxl.h | 15 +++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 include/cxl/cxl.h diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 42874948b589..f7b20f60ac5c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -4173,6 +4174,35 @@ static int cxl_region_setup_poison(struct cxl_region *cxlr) return devm_add_action_or_reset(dev, remove_debugfs, dentry); } +static int region_contains_resource(struct device *dev, void *data) +{ + struct resource *res = data; + struct cxl_region *cxlr; + struct cxl_region_params *p; + + if (!is_cxl_region(dev)) + return 0; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + + if (p->state != CXL_CONFIG_COMMIT) + return 0; + + if (!p->res) + return 0; + + return resource_contains(p->res, res) ? 1 : 0; +} + +bool cxl_region_contains_resource(struct resource *res) +{ + guard(rwsem_read)(&cxl_rwsem.region); + return bus_for_each_dev(&cxl_bus_type, NULL, res, + region_contains_resource) != 0; +} +EXPORT_SYMBOL_GPL(cxl_region_contains_resource); + static int cxl_region_can_probe(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h new file mode 100644 index 000000000000..b12d3d0f6658 --- /dev/null +++ b/include/cxl/cxl.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2026 Advanced Micro Devices, Inc. */ +#ifndef _CXL_H_ +#define _CXL_H_ + +#ifdef CONFIG_CXL_REGION +bool cxl_region_contains_resource(struct resource *res); +#else +static inline bool cxl_region_contains_resource(struct resource *res) +{ + return false; +} +#endif + +#endif /* _CXL_H_ */ From e4de6b910bf3645c224cd873d4e03ce3dd81fbe0 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Sun, 22 Mar 2026 19:53:41 +0000 Subject: [PATCH 08/17] dax/hmem, cxl: Defer and resolve Soft Reserved ownership The current probe time ownership check for Soft Reserved memory based solely on CXL window intersection is insufficient. dax_hmem probing is not always guaranteed to run after CXL enumeration and region assembly, which can lead to incorrect ownership decisions before the CXL stack has finished publishing windows and assembling committed regions. Introduce deferred ownership handling for Soft Reserved ranges that intersect CXL windows. When such a range is encountered during the initial dax_hmem probe, schedule deferred work to wait for the CXL stack to complete enumeration and region assembly before deciding ownership. Once the deferred work runs, evaluate each Soft Reserved range individually: if a CXL region fully contains the range, skip it and let dax_cxl bind. Otherwise, register it with dax_hmem. This per-range ownership model avoids the need for CXL region teardown and alloc_dax_region() resource exclusion prevents double claiming. Introduce a boolean flag dax_hmem_initial_probe to live inside device.c so it survives module reload. Ensure dax_cxl defers driver registration until dax_hmem has completed ownership resolution. dax_cxl calls dax_hmem_flush_work() before cxl_driver_register(), which both waits for the deferred work to complete and creates a module symbol dependency that forces dax_hmem.ko to load before dax_cxl. Co-developed-by: Dan Williams Signed-off-by: Smita Koralahalli Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20260322195343.206900-9-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/dax/bus.h | 7 ++++ drivers/dax/cxl.c | 1 + drivers/dax/hmem/device.c | 3 ++ drivers/dax/hmem/hmem.c | 74 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+) diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index cbbf64443098..ebbfe2d6da14 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -49,6 +49,13 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv); void kill_dev_dax(struct dev_dax *dev_dax); bool static_dev_dax(struct dev_dax *dev_dax); +#if IS_ENABLED(CONFIG_DEV_DAX_HMEM) +extern bool dax_hmem_initial_probe; +void dax_hmem_flush_work(void); +#else +static inline void dax_hmem_flush_work(void) { } +#endif + #define MODULE_ALIAS_DAX_DEVICE(type) \ MODULE_ALIAS("dax:t" __stringify(type) "*") #define DAX_DEVICE_MODALIAS_FMT "dax:t%d" diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c index a2136adfa186..3ab39b77843d 100644 --- a/drivers/dax/cxl.c +++ b/drivers/dax/cxl.c @@ -44,6 +44,7 @@ static struct cxl_driver cxl_dax_region_driver = { static void cxl_dax_region_driver_register(struct work_struct *work) { + dax_hmem_flush_work(); cxl_driver_register(&cxl_dax_region_driver); } diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 56e3cbd181b5..991a4bf7d969 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,9 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +bool dax_hmem_initial_probe; +EXPORT_SYMBOL_GPL(dax_hmem_initial_probe); + static bool platform_initialized; static DEFINE_MUTEX(hmem_resource_lock); static struct resource hmem_active = { diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index ca752db03201..9ceda6b5cadf 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "../bus.h" static bool region_idle; @@ -58,6 +59,23 @@ static void release_hmem(void *pdev) platform_device_unregister(pdev); } +struct dax_defer_work { + struct platform_device *pdev; + struct work_struct work; +}; + +static void process_defer_work(struct work_struct *w); + +static struct dax_defer_work dax_hmem_work = { + .work = __WORK_INITIALIZER(dax_hmem_work.work, process_defer_work), +}; + +void dax_hmem_flush_work(void) +{ + flush_work(&dax_hmem_work.work); +} +EXPORT_SYMBOL_GPL(dax_hmem_flush_work); + static int __hmem_register_device(struct device *host, int target_nid, const struct resource *res) { @@ -122,6 +140,11 @@ static int hmem_register_device(struct device *host, int target_nid, if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && region_intersects(res->start, resource_size(res), IORESOURCE_MEM, IORES_DESC_CXL) != REGION_DISJOINT) { + if (!dax_hmem_initial_probe) { + dev_dbg(host, "await CXL initial probe: %pr\n", res); + queue_work(system_long_wq, &dax_hmem_work.work); + return 0; + } dev_dbg(host, "deferring range to CXL: %pr\n", res); return 0; } @@ -129,8 +152,54 @@ static int hmem_register_device(struct device *host, int target_nid, return __hmem_register_device(host, target_nid, res); } +static int hmem_register_cxl_device(struct device *host, int target_nid, + const struct resource *res) +{ + if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_CXL) == REGION_DISJOINT) + return 0; + + if (cxl_region_contains_resource((struct resource *)res)) { + dev_dbg(host, "CXL claims resource, dropping: %pr\n", res); + return 0; + } + + dev_dbg(host, "CXL did not claim resource, registering: %pr\n", res); + return __hmem_register_device(host, target_nid, res); +} + +static void process_defer_work(struct work_struct *w) +{ + struct dax_defer_work *work = container_of(w, typeof(*work), work); + struct platform_device *pdev; + + if (!work->pdev) + return; + + pdev = work->pdev; + + /* Relies on cxl_acpi and cxl_pci having had a chance to load */ + wait_for_device_probe(); + + guard(device)(&pdev->dev); + if (!pdev->dev.driver) + return; + + if (!dax_hmem_initial_probe) { + dax_hmem_initial_probe = true; + walk_hmem_resources(&pdev->dev, hmem_register_cxl_device); + } +} + static int dax_hmem_platform_probe(struct platform_device *pdev) { + if (work_pending(&dax_hmem_work.work)) + return -EBUSY; + + if (!dax_hmem_work.pdev) + dax_hmem_work.pdev = + to_platform_device(get_device(&pdev->dev)); + return walk_hmem_resources(&pdev->dev, hmem_register_device); } @@ -168,6 +237,11 @@ static __init int dax_hmem_init(void) static __exit void dax_hmem_exit(void) { + if (dax_hmem_work.pdev) { + flush_work(&dax_hmem_work.work); + put_device(&dax_hmem_work.pdev->dev); + } + platform_driver_unregister(&dax_hmem_driver); platform_driver_unregister(&dax_hmem_platform_driver); } From 87805c32e6ad7b5ce2d9f7f47e76081857a4a335 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:13 -0700 Subject: [PATCH 09/17] cxl/region: Fix use-after-free from auto assembly failure The following crash signature results from region destruction while an endpoint decoder is staged, but not fully attached. [ dj: Moved bus_find_device( to next line. ] Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260327052821.440749-2-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 54 ++++++++++++++++++++++++++++++++++++++- drivers/cxl/cxl.h | 6 +++-- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f7b20f60ac5c..b89442931277 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1064,6 +1064,14 @@ static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr, if (!cxld->region) { cxld->region = cxlr; + + /* + * Now that cxld->region is set the intermediate staging state + * can be cleared. + */ + if (cxld == &cxled->cxld && + cxled->state == CXL_DECODER_STATE_AUTO_STAGED) + cxled->state = CXL_DECODER_STATE_AUTO; get_device(&cxlr->dev); } @@ -1805,6 +1813,7 @@ static int cxl_region_attach_auto(struct cxl_region *cxlr, pos = p->nr_targets; p->targets[pos] = cxled; cxled->pos = pos; + cxled->state = CXL_DECODER_STATE_AUTO_STAGED; p->nr_targets++; return 0; @@ -2154,6 +2163,47 @@ static int cxl_region_attach(struct cxl_region *cxlr, return 0; } +static int cxl_region_by_target(struct device *dev, const void *data) +{ + const struct cxl_endpoint_decoder *cxled = data; + struct cxl_region_params *p; + struct cxl_region *cxlr; + + if (!is_cxl_region(dev)) + return 0; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + return p->targets[cxled->pos] == cxled; +} + +/* + * When an auto-region fails to assemble the decoder may be listed as a target, + * but not fully attached. + */ +static void cxl_cancel_auto_attach(struct cxl_endpoint_decoder *cxled) +{ + struct cxl_region_params *p; + struct cxl_region *cxlr; + int pos = cxled->pos; + + if (cxled->state != CXL_DECODER_STATE_AUTO_STAGED) + return; + + struct device *dev __free(put_device) = + bus_find_device(&cxl_bus_type, NULL, cxled, cxl_region_by_target); + if (!dev) + return; + + cxlr = to_cxl_region(dev); + p = &cxlr->params; + + p->nr_targets--; + cxled->state = CXL_DECODER_STATE_AUTO; + cxled->pos = -1; + p->targets[pos] = NULL; +} + static struct cxl_region * __cxl_decoder_detach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, @@ -2177,8 +2227,10 @@ __cxl_decoder_detach(struct cxl_region *cxlr, cxled = p->targets[pos]; } else { cxlr = cxled->cxld.region; - if (!cxlr) + if (!cxlr) { + cxl_cancel_auto_attach(cxled); return NULL; + } p = &cxlr->params; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 9b947286eb9b..30a31968f266 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -378,12 +378,14 @@ struct cxl_decoder { }; /* - * Track whether this decoder is reserved for region autodiscovery, or - * free for userspace provisioning. + * Track whether this decoder is free for userspace provisioning, reserved for + * region autodiscovery, whether it is started connecting (awaiting other + * peers), or has completed auto assembly. */ enum cxl_decoder_state { CXL_DECODER_STATE_MANUAL, CXL_DECODER_STATE_AUTO, + CXL_DECODER_STATE_AUTO_STAGED, }; /** From 1eaef15b2349087d9ce583b9153970d5cf5c5329 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:14 -0700 Subject: [PATCH 10/17] dax/cxl: Fix HMEM dependencies The expectation is that DEV_DAX_HMEM=y should be disallowed if any of CXL_ACPI, or CXL_PCI are set =m. Also DEV_DAX_CXL=y should be disallowed if DEV_DAX_HMEM=m. Use "$config || !$config" syntax for each dependency. Otherwise, the invalid DEV_DAX_HMEM=m && DEV_DAX_CXL=y configuration is allowed. Lastly, dax_hmem depends on the availability of the cxl_region_contains_resource() symbol published by the cxl_core.ko module. So, also prevent DEV_DAX_HMEM from being built-in when the cxl_core module is not built-in. Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260327052821.440749-3-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/dax/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 3683bb3f2311..504f7f735ef5 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -32,6 +32,9 @@ config DEV_DAX_HMEM depends on EFI_SOFT_RESERVE select NUMA_KEEP_MEMINFO if NUMA_MEMBLKS default DEV_DAX + depends on CXL_ACPI || !CXL_ACPI + depends on CXL_PCI || !CXL_PCI + depends on CXL_BUS || !CXL_BUS help EFI 2.8 platforms, and others, may advertise 'specific purpose' memory. For example, a high bandwidth memory pool. The @@ -48,8 +51,7 @@ config DEV_DAX_CXL tristate "CXL DAX: direct access to CXL RAM regions" depends on CXL_BUS && CXL_REGION && DEV_DAX default CXL_REGION && DEV_DAX - depends on CXL_ACPI >= DEV_DAX_HMEM - depends on CXL_PCI >= DEV_DAX_HMEM + depends on DEV_DAX_HMEM || !DEV_DAX_HMEM help CXL RAM regions are either mapped by platform-firmware and published in the initial system-memory map as "System RAM", mapped From b6a61d5baf99c012c61ee93f8295185942cd7495 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:15 -0700 Subject: [PATCH 11/17] cxl/region: Limit visibility of cxl_region_contains_resource() The dax_hmem dependency on cxl_region_contains_resource() is a one-off special case. It is not suitable for other use cases. Move the definition to the other CONFIG_CXL_REGION guarded definitions in drivers/cxl/cxl.h and include that by a relative path include. This matches what drivers/dax/cxl.c does for its limited private usage of CXL core symbols. Reduce the symbol export visibility from global to just dax_hmem, to further clarify its applicability. Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260327052821.440749-4-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 3 +-- drivers/cxl/cxl.h | 5 +++++ drivers/dax/hmem/hmem.c | 2 +- include/cxl/cxl.h | 15 --------------- 4 files changed, 7 insertions(+), 18 deletions(-) delete mode 100644 include/cxl/cxl.h diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index b89442931277..657844cf0379 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include "core.h" @@ -4253,7 +4252,7 @@ bool cxl_region_contains_resource(struct resource *res) return bus_for_each_dev(&cxl_bus_type, NULL, res, region_contains_resource) != 0; } -EXPORT_SYMBOL_GPL(cxl_region_contains_resource); +EXPORT_SYMBOL_FOR_MODULES(cxl_region_contains_resource, "dax_hmem"); static int cxl_region_can_probe(struct cxl_region *cxlr) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 30a31968f266..84ad04a02bde 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -941,6 +941,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); int cxl_add_to_region(struct cxl_endpoint_decoder *cxled); struct cxl_dax_region *to_cxl_dax_region(struct device *dev); u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa); +bool cxl_region_contains_resource(struct resource *res); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -963,6 +964,10 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, { return 0; } +static inline bool cxl_region_contains_resource(struct resource *res) +{ + return false; +} #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 9ceda6b5cadf..0051e553c33f 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include "../../cxl/cxl.h" #include "../bus.h" static bool region_idle; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h deleted file mode 100644 index b12d3d0f6658..000000000000 --- a/include/cxl/cxl.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2026 Advanced Micro Devices, Inc. */ -#ifndef _CXL_H_ -#define _CXL_H_ - -#ifdef CONFIG_CXL_REGION -bool cxl_region_contains_resource(struct resource *res); -#else -static inline bool cxl_region_contains_resource(struct resource *res) -{ - return false; -} -#endif - -#endif /* _CXL_H_ */ From 471d88441eb990ef1b64713e6975cb3549b1824b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:16 -0700 Subject: [PATCH 12/17] cxl/region: Constify cxl_region_resource_contains() The call to cxl_region_resource_contains() in hmem_register_cxl_device() need not cast away 'const'. The problem is the usage of the bus_for_each_dev() API which does not mark its @data parameter as 'const'. Switch to bus_find_device() which does take 'const' @data, fixup cxl_region_resource_contains() and its caller. Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260327052821.440749-5-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 11 ++++++----- drivers/cxl/cxl.h | 4 ++-- drivers/dax/hmem/hmem.c | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 657844cf0379..30787faef352 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -4225,9 +4225,9 @@ static int cxl_region_setup_poison(struct cxl_region *cxlr) return devm_add_action_or_reset(dev, remove_debugfs, dentry); } -static int region_contains_resource(struct device *dev, void *data) +static int region_contains_resource(struct device *dev, const void *data) { - struct resource *res = data; + const struct resource *res = data; struct cxl_region *cxlr; struct cxl_region_params *p; @@ -4246,11 +4246,12 @@ static int region_contains_resource(struct device *dev, void *data) return resource_contains(p->res, res) ? 1 : 0; } -bool cxl_region_contains_resource(struct resource *res) +bool cxl_region_contains_resource(const struct resource *res) { guard(rwsem_read)(&cxl_rwsem.region); - return bus_for_each_dev(&cxl_bus_type, NULL, res, - region_contains_resource) != 0; + struct device *dev __free(put_device) = bus_find_device( + &cxl_bus_type, NULL, res, region_contains_resource); + return !!dev; } EXPORT_SYMBOL_FOR_MODULES(cxl_region_contains_resource, "dax_hmem"); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 84ad04a02bde..340bdc9fcacc 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -941,7 +941,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); int cxl_add_to_region(struct cxl_endpoint_decoder *cxled); struct cxl_dax_region *to_cxl_dax_region(struct device *dev); u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa); -bool cxl_region_contains_resource(struct resource *res); +bool cxl_region_contains_resource(const struct resource *res); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -964,7 +964,7 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, { return 0; } -static inline bool cxl_region_contains_resource(struct resource *res) +static inline bool cxl_region_contains_resource(const struct resource *res) { return false; } diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 0051e553c33f..b2ab1292fa81 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -159,7 +159,7 @@ static int hmem_register_cxl_device(struct device *host, int target_nid, IORES_DESC_CXL) == REGION_DISJOINT) return 0; - if (cxl_region_contains_resource((struct resource *)res)) { + if (cxl_region_contains_resource(res)) { dev_dbg(host, "CXL claims resource, dropping: %pr\n", res); return 0; } From 3cba30eed56df3af80ae8d4fde9cf4039eace82a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:17 -0700 Subject: [PATCH 13/17] dax/hmem: Reduce visibility of dax_cxl coordination symbols No other module or use case should be using dax_hmem_initial_probe or dax_hmem_flush_work(). Limit their use to dax_hmem, and dax_cxl respectively. Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260327052821.440749-6-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/dax/hmem/device.c | 2 +- drivers/dax/hmem/hmem.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 991a4bf7d969..675d56276d78 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -9,7 +9,7 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); bool dax_hmem_initial_probe; -EXPORT_SYMBOL_GPL(dax_hmem_initial_probe); +EXPORT_SYMBOL_FOR_MODULES(dax_hmem_initial_probe, "dax_hmem"); static bool platform_initialized; static DEFINE_MUTEX(hmem_resource_lock); diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index b2ab1292fa81..dd3d7f93baee 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -74,7 +74,7 @@ void dax_hmem_flush_work(void) { flush_work(&dax_hmem_work.work); } -EXPORT_SYMBOL_GPL(dax_hmem_flush_work); +EXPORT_SYMBOL_FOR_MODULES(dax_hmem_flush_work, "dax_cxl"); static int __hmem_register_device(struct device *host, int target_nid, const struct resource *res) From f8dc1bde187310e0345beb08df949e0c2a4c86ce Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:18 -0700 Subject: [PATCH 14/17] dax/hmem: Fix singleton confusion between dax_hmem_work and hmem devices dax_hmem (ab)uses a platform device to allow for a module to autoload in the presence of "Soft Reserved" resources. The dax_hmem driver had no dependencies on the "hmem_platform" device being a singleton until the recent "dax_hmem vs dax_cxl" takeover solution. Replace the layering violation of dax_hmem_work assuming that there will never be more than one "hmem_platform" device associated with a global work item with a dax_hmem local workqueue that can theoretically support any number of hmem_platform devices. Fixup the reference counting to only pin the device while it is live in the queue. Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260327052821.440749-7-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/dax/bus.h | 15 +++++- drivers/dax/hmem/device.c | 28 ++++++---- drivers/dax/hmem/hmem.c | 108 +++++++++++++++++++------------------- 3 files changed, 85 insertions(+), 66 deletions(-) diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index ebbfe2d6da14..7b1a83f1ce1f 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -3,7 +3,9 @@ #ifndef __DAX_BUS_H__ #define __DAX_BUS_H__ #include +#include #include +#include struct dev_dax; struct resource; @@ -49,8 +51,19 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv); void kill_dev_dax(struct dev_dax *dev_dax); bool static_dev_dax(struct dev_dax *dev_dax); +struct hmem_platform_device { + struct platform_device pdev; + struct work_struct work; + bool did_probe; +}; + +static inline struct hmem_platform_device * +to_hmem_platform_device(struct platform_device *pdev) +{ + return container_of(pdev, struct hmem_platform_device, pdev); +} + #if IS_ENABLED(CONFIG_DEV_DAX_HMEM) -extern bool dax_hmem_initial_probe; void dax_hmem_flush_work(void); #else static inline void dax_hmem_flush_work(void) { } diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 675d56276d78..d70359b4307b 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -4,13 +4,11 @@ #include #include #include +#include "../bus.h" static bool nohmem; module_param_named(disable, nohmem, bool, 0444); -bool dax_hmem_initial_probe; -EXPORT_SYMBOL_FOR_MODULES(dax_hmem_initial_probe, "dax_hmem"); - static bool platform_initialized; static DEFINE_MUTEX(hmem_resource_lock); static struct resource hmem_active = { @@ -36,9 +34,21 @@ int walk_hmem_resources(struct device *host, walk_hmem_fn fn) } EXPORT_SYMBOL_GPL(walk_hmem_resources); +static void hmem_work(struct work_struct *work) +{ + /* place holder until dax_hmem driver attaches */ +} + +static struct hmem_platform_device hmem_platform = { + .pdev = { + .name = "hmem_platform", + .id = 0, + }, + .work = __WORK_INITIALIZER(hmem_platform.work, hmem_work), +}; + static void __hmem_register_resource(int target_nid, struct resource *res) { - struct platform_device *pdev; struct resource *new; int rc; @@ -54,17 +64,13 @@ static void __hmem_register_resource(int target_nid, struct resource *res) if (platform_initialized) return; - pdev = platform_device_alloc("hmem_platform", 0); - if (!pdev) { + rc = platform_device_register(&hmem_platform.pdev); + if (rc) { pr_err_once("failed to register device-dax hmem_platform device\n"); return; } - rc = platform_device_add(pdev); - if (rc) - platform_device_put(pdev); - else - platform_initialized = true; + platform_initialized = true; } void hmem_register_resource(int target_nid, struct resource *res) diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index dd3d7f93baee..e1dae83dae8d 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -59,20 +59,11 @@ static void release_hmem(void *pdev) platform_device_unregister(pdev); } -struct dax_defer_work { - struct platform_device *pdev; - struct work_struct work; -}; - -static void process_defer_work(struct work_struct *w); - -static struct dax_defer_work dax_hmem_work = { - .work = __WORK_INITIALIZER(dax_hmem_work.work, process_defer_work), -}; +static struct workqueue_struct *dax_hmem_wq; void dax_hmem_flush_work(void) { - flush_work(&dax_hmem_work.work); + flush_workqueue(dax_hmem_wq); } EXPORT_SYMBOL_FOR_MODULES(dax_hmem_flush_work, "dax_cxl"); @@ -134,24 +125,6 @@ static int __hmem_register_device(struct device *host, int target_nid, return rc; } -static int hmem_register_device(struct device *host, int target_nid, - const struct resource *res) -{ - if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && - region_intersects(res->start, resource_size(res), IORESOURCE_MEM, - IORES_DESC_CXL) != REGION_DISJOINT) { - if (!dax_hmem_initial_probe) { - dev_dbg(host, "await CXL initial probe: %pr\n", res); - queue_work(system_long_wq, &dax_hmem_work.work); - return 0; - } - dev_dbg(host, "deferring range to CXL: %pr\n", res); - return 0; - } - - return __hmem_register_device(host, target_nid, res); -} - static int hmem_register_cxl_device(struct device *host, int target_nid, const struct resource *res) { @@ -170,35 +143,55 @@ static int hmem_register_cxl_device(struct device *host, int target_nid, static void process_defer_work(struct work_struct *w) { - struct dax_defer_work *work = container_of(w, typeof(*work), work); - struct platform_device *pdev; - - if (!work->pdev) - return; - - pdev = work->pdev; + struct hmem_platform_device *hpdev = container_of(w, typeof(*hpdev), work); + struct device *dev = &hpdev->pdev.dev; /* Relies on cxl_acpi and cxl_pci having had a chance to load */ wait_for_device_probe(); - guard(device)(&pdev->dev); - if (!pdev->dev.driver) - return; + guard(device)(dev); + if (!dev->driver) + goto out; - if (!dax_hmem_initial_probe) { - dax_hmem_initial_probe = true; - walk_hmem_resources(&pdev->dev, hmem_register_cxl_device); + if (!hpdev->did_probe) { + hpdev->did_probe = true; + walk_hmem_resources(dev, hmem_register_cxl_device); } +out: + put_device(dev); +} + +static int hmem_register_device(struct device *host, int target_nid, + const struct resource *res) +{ + struct platform_device *pdev = to_platform_device(host); + struct hmem_platform_device *hpdev = to_hmem_platform_device(pdev); + + if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && + region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_CXL) != REGION_DISJOINT) { + if (!hpdev->did_probe) { + dev_dbg(host, "await CXL initial probe: %pr\n", res); + hpdev->work.func = process_defer_work; + get_device(host); + if (!queue_work(dax_hmem_wq, &hpdev->work)) + put_device(host); + return 0; + } + dev_dbg(host, "deferring range to CXL: %pr\n", res); + return 0; + } + + return __hmem_register_device(host, target_nid, res); } static int dax_hmem_platform_probe(struct platform_device *pdev) { - if (work_pending(&dax_hmem_work.work)) - return -EBUSY; + struct hmem_platform_device *hpdev = to_hmem_platform_device(pdev); - if (!dax_hmem_work.pdev) - dax_hmem_work.pdev = - to_platform_device(get_device(&pdev->dev)); + /* queue is only flushed on module unload, fail rebind with pending work */ + if (work_pending(&hpdev->work)) + return -EBUSY; return walk_hmem_resources(&pdev->dev, hmem_register_device); } @@ -224,26 +217,33 @@ static __init int dax_hmem_init(void) request_module("cxl_pci"); } + dax_hmem_wq = alloc_ordered_workqueue("dax_hmem_wq", 0); + if (!dax_hmem_wq) + return -ENOMEM; + rc = platform_driver_register(&dax_hmem_platform_driver); if (rc) - return rc; + goto err_platform_driver; rc = platform_driver_register(&dax_hmem_driver); if (rc) - platform_driver_unregister(&dax_hmem_platform_driver); + goto err_driver; + + return 0; + +err_driver: + platform_driver_unregister(&dax_hmem_platform_driver); +err_platform_driver: + destroy_workqueue(dax_hmem_wq); return rc; } static __exit void dax_hmem_exit(void) { - if (dax_hmem_work.pdev) { - flush_work(&dax_hmem_work.work); - put_device(&dax_hmem_work.pdev->dev); - } - platform_driver_unregister(&dax_hmem_driver); platform_driver_unregister(&dax_hmem_platform_driver); + destroy_workqueue(dax_hmem_wq); } module_init(dax_hmem_init); From 059edcc405e46cc10ee65ab2c039aa6bccfbb3a0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:19 -0700 Subject: [PATCH 15/17] dax/hmem: Parent dax_hmem devices For test purposes it is useful to be able to determine which "hmem_platform" device is hosting a given sub-device. Register hmem devices underneath "hmem_platform". Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260327052821.440749-8-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- drivers/dax/hmem/hmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index e1dae83dae8d..af21f66bf872 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -96,6 +96,7 @@ static int __hmem_register_device(struct device *host, int target_nid, return -ENOMEM; } + pdev->dev.parent = host; pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, From 78b8f1a7a4ab39cecd926d50627db3537e0f2ee9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:20 -0700 Subject: [PATCH 16/17] tools/testing/cxl: Simulate auto-assembly failure Add a cxl_test module option to skip setting up one of the members of the default auto-assembled region. This simulates a device failing between firmware setup and OS boot, or region configuration interrupted by an event like kexec. Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20260327052821.440749-9-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- tools/testing/cxl/test/cxl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 81e2aef3627a..7deeb7ff7bdf 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -16,6 +16,7 @@ static int interleave_arithmetic; static bool extended_linear_cache; +static bool fail_autoassemble; #define FAKE_QTG_ID 42 @@ -819,6 +820,12 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) return; } + /* Simulate missing cxl_mem.4 configuration */ + if (hb0 && pdev->id == 4 && cxld->id == 0 && fail_autoassemble) { + default_mock_decoder(cxld); + return; + } + base = window->base_hpa; if (extended_linear_cache) base += mock_auto_region_size; @@ -1620,6 +1627,8 @@ module_param(interleave_arithmetic, int, 0444); MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); module_param(extended_linear_cache, bool, 0444); MODULE_PARM_DESC(extended_linear_cache, "Enable extended linear cache support"); +module_param(fail_autoassemble, bool, 0444); +MODULE_PARM_DESC(fail_autoassemble, "Simulate missing member of an auto-region"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); From 549b5c12ef06441dbde4718f16e23c547f5592d7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Mar 2026 22:28:21 -0700 Subject: [PATCH 17/17] tools/testing/cxl: Test dax_hmem takeover of CXL regions When platform firmware is committed to publishing EFI_CONVENTIONAL_MEMORY in the memory map, but CXL fails to assemble the region, dax_hmem can attempt to attach a dax device to the memory range. Take advantage of the new ability to support multiple "hmem_platform" devices, and to enable regression testing of several scenarios: * CXL correctly assembles a region, check dax_hmem fails to attach dax * CXL fails to assemble a region, check dax_hmem successfully attaches dax * Check that loading the dax_cxl driver loads the dax_hmem driver * Attempt to race cxl_mock_mem async probe vs dax_hmem probe flushing. Check that both positive and negative cases. Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Tested-by: Alison Schofield Link: https://patch.msgid.link/20260327052821.440749-10-dan.j.williams@intel.com Signed-off-by: Dave Jiang --- tools/testing/cxl/Kbuild | 7 ++++ tools/testing/cxl/test/Kbuild | 1 + tools/testing/cxl/test/cxl.c | 57 ++++++++++++++++++++++++++++++ tools/testing/cxl/test/hmem_test.c | 47 ++++++++++++++++++++++++ tools/testing/cxl/test/mem.c | 3 ++ tools/testing/cxl/test/mock.c | 50 ++++++++++++++++++++++++++ tools/testing/cxl/test/mock.h | 8 +++++ 7 files changed, 173 insertions(+) create mode 100644 tools/testing/cxl/test/hmem_test.c diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 53d84a6874b7..540425c7cd41 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -11,8 +11,12 @@ ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup ldflags-y += --wrap=hmat_get_extended_linear_cache_size ldflags-y += --wrap=devm_cxl_add_dport_by_dev ldflags-y += --wrap=devm_cxl_switch_port_decoders_setup +ldflags-y += --wrap=walk_hmem_resources +ldflags-y += --wrap=region_intersects +ldflags-y += --wrap=region_intersects_soft_reserve DRIVERS := ../../../drivers +DAX_HMEM_SRC := $(DRIVERS)/dax/hmem CXL_SRC := $(DRIVERS)/cxl CXL_CORE_SRC := $(DRIVERS)/cxl/core ccflags-y := -I$(srctree)/drivers/cxl/ @@ -70,6 +74,9 @@ cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o +obj-m += dax_hmem.o +dax_hmem-y := $(DAX_HMEM_SRC)/hmem.o + KBUILD_CFLAGS := $(filter-out -Wmissing-prototypes -Wmissing-declarations, $(KBUILD_CFLAGS)) obj-m += test/ diff --git a/tools/testing/cxl/test/Kbuild b/tools/testing/cxl/test/Kbuild index af50972c8b6d..c168e3c998a7 100644 --- a/tools/testing/cxl/test/Kbuild +++ b/tools/testing/cxl/test/Kbuild @@ -7,6 +7,7 @@ obj-m += cxl_mock_mem.o obj-m += cxl_translate.o cxl_test-y := cxl.o +cxl_test-y += hmem_test.o cxl_mock-y := mock.o cxl_mock_mem-y := mem.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 7deeb7ff7bdf..9a9f52090c1d 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1121,6 +1121,53 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) cxl_endpoint_get_perf_coordinates(port, ep_c); } +/* + * Simulate that the first half of mock CXL Window 0 is "Soft Reserve" capacity + */ +static int mock_walk_hmem_resources(struct device *host, walk_hmem_fn fn) +{ + struct acpi_cedt_cfmws *cfmws = mock_cfmws[0]; + struct resource window = + DEFINE_RES_MEM(cfmws->base_hpa, cfmws->window_size / 2); + + dev_dbg(host, "walk cxl_test resource: %pr\n", &window); + return fn(host, 0, &window); +} + +/* + * This should only be called by the dax_hmem case, treat mismatches (negative + * result) as "fallback to base region_intersects()". Simulate that the first + * half of mock CXL Window 0 is IORES_DESC_CXL capacity. + */ +static int mock_region_intersects(resource_size_t start, size_t size, + unsigned long flags, unsigned long desc) +{ + struct resource res = DEFINE_RES_MEM(start, size); + struct acpi_cedt_cfmws *cfmws = mock_cfmws[0]; + struct resource window = + DEFINE_RES_MEM(cfmws->base_hpa, cfmws->window_size / 2); + + if (resource_overlaps(&res, &window)) + return REGION_INTERSECTS; + pr_debug("warning: no cxl_test CXL intersection for %pr\n", &res); + return -1; +} + + +static int +mock_region_intersects_soft_reserve(resource_size_t start, size_t size) +{ + struct resource res = DEFINE_RES_MEM(start, size); + struct acpi_cedt_cfmws *cfmws = mock_cfmws[0]; + struct resource window = + DEFINE_RES_MEM(cfmws->base_hpa, cfmws->window_size / 2); + + if (resource_overlaps(&res, &window)) + return REGION_INTERSECTS; + pr_debug("warning: no cxl_test soft reserve intersection for %pr\n", &res); + return -1; +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, @@ -1136,6 +1183,9 @@ static struct cxl_mock_ops cxl_mock_ops = { .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev, .hmat_get_extended_linear_cache_size = mock_hmat_get_extended_linear_cache_size, + .walk_hmem_resources = mock_walk_hmem_resources, + .region_intersects = mock_region_intersects, + .region_intersects_soft_reserve = mock_region_intersects_soft_reserve, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; @@ -1561,8 +1611,14 @@ static __init int cxl_test_init(void) if (rc) goto err_root; + rc = hmem_test_init(); + if (rc) + goto err_mem; + return 0; +err_mem: + cxl_mem_exit(); err_root: platform_device_put(cxl_acpi); err_rch: @@ -1600,6 +1656,7 @@ static __exit void cxl_test_exit(void) { int i; + hmem_test_exit(); cxl_mem_exit(); platform_device_unregister(cxl_acpi); cxl_rch_topo_exit(); diff --git a/tools/testing/cxl/test/hmem_test.c b/tools/testing/cxl/test/hmem_test.c new file mode 100644 index 000000000000..3a1a089e1721 --- /dev/null +++ b/tools/testing/cxl/test/hmem_test.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2026 Intel Corporation */ +#include +#include +#include "../../../drivers/dax/bus.h" + +static bool hmem_test; + +static void hmem_test_work(struct work_struct *work) +{ +} + +static void hmem_test_release(struct device *dev) +{ + struct hmem_platform_device *hpdev = + container_of(dev, typeof(*hpdev), pdev.dev); + + memset(hpdev, 0, sizeof(*hpdev)); +} + +static struct hmem_platform_device hmem_test_device = { + .pdev = { + .name = "hmem_platform", + .id = 1, + .dev = { + .release = hmem_test_release, + }, + }, + .work = __WORK_INITIALIZER(hmem_test_device.work, hmem_test_work), +}; + +int hmem_test_init(void) +{ + if (!hmem_test) + return 0; + + return platform_device_register(&hmem_test_device.pdev); +} + +void hmem_test_exit(void) +{ + if (hmem_test) + platform_device_unregister(&hmem_test_device.pdev); +} + +module_param(hmem_test, bool, 0444); +MODULE_PARM_DESC(hmem_test, "Enable/disable the dax_hmem test platform device"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index cb87e8c0e63c..cc847e9aeceb 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1695,6 +1695,9 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) struct cxl_dpa_info range_info = { 0 }; int rc; + /* Increase async probe race window */ + usleep_range(500*1000, 1000*1000); + mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); if (!mdata) return -ENOMEM; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index b8fcb50c1027..6454b868b122 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -251,6 +251,56 @@ struct cxl_dport *__wrap_devm_cxl_add_dport_by_dev(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_dport_by_dev, "CXL"); +int __wrap_region_intersects(resource_size_t start, size_t size, + unsigned long flags, unsigned long desc) +{ + int rc = -1; + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops) + rc = ops->region_intersects(start, size, flags, desc); + if (rc < 0) + rc = region_intersects(start, size, flags, desc); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_GPL(__wrap_region_intersects); + +int __wrap_region_intersects_soft_reserve(resource_size_t start, size_t size) +{ + int rc = -1; + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops) + rc = ops->region_intersects_soft_reserve(start, size); + if (rc < 0) + rc = region_intersects_soft_reserve(start, size); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_GPL(__wrap_region_intersects_soft_reserve); + +int __wrap_walk_hmem_resources(struct device *host, walk_hmem_fn fn) +{ + int index, rc = 0; + bool is_mock = strcmp(dev_name(host), "hmem_platform.1") == 0; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (is_mock) { + if (ops) + rc = ops->walk_hmem_resources(host, fn); + } else { + rc = walk_hmem_resources(host, fn); + } + put_cxl_mock_ops(index); + return rc; +} +EXPORT_SYMBOL_GPL(__wrap_walk_hmem_resources); + MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("cxl_test: emulation module"); MODULE_IMPORT_NS("ACPI"); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 2684b89c8aa2..4f57dc80ae7d 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -2,6 +2,7 @@ #include #include +#include #include struct cxl_mock_ops { @@ -27,8 +28,15 @@ struct cxl_mock_ops { int (*hmat_get_extended_linear_cache_size)(struct resource *backing_res, int nid, resource_size_t *cache_size); + int (*walk_hmem_resources)(struct device *host, walk_hmem_fn fn); + int (*region_intersects)(resource_size_t start, size_t size, + unsigned long flags, unsigned long desc); + int (*region_intersects_soft_reserve)(resource_size_t start, + size_t size); }; +int hmem_test_init(void); +void hmem_test_exit(void); void register_cxl_mock_ops(struct cxl_mock_ops *ops); void unregister_cxl_mock_ops(struct cxl_mock_ops *ops); struct cxl_mock_ops *get_cxl_mock_ops(int *index);