From d9e9aa3e971b37c6d6dfd15ad8dc65537a925725 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Tue, 15 Jul 2025 03:25:03 +0530 Subject: [PATCH 1/9] drm/xe: Don't fail probe on unsupported mailbox command If the device is running older pcode firmware, it is possible that newer mailbox commands are not supported by it. The sysfs attributes aren't useful in that case, but we shouldn't fail driver probe because of it. As of now, it is unknown if we can distinguish unsupported commands before attempting them. But until we figure out a way to do that, fix the regressions. v2: Add debug message (Lucas) Fixes: cdc36b66cd41 ("drm/xe: Expose fan control and voltage regulator version") Signed-off-by: Raag Jadav Tested-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250714215503.2897748-1-raag.jadav@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit ed5461daa150b037e36b8202381da1ef85d6b16b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_sysfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index e5fd0cd537bc..bd9015761aa0 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -160,8 +160,13 @@ static int late_bind_create_files(struct device *dev) ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), &cap, NULL); - if (ret) + if (ret) { + if (ret == -ENXIO) { + drm_dbg(&xe->drm, "Late binding not supported by firmware\n"); + ret = 0; + } goto out; + } if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); From 6aaceed7fe1a400082ec5990884b11ef7266a605 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 15 Jul 2025 11:14:22 -0700 Subject: [PATCH 2/9] drm/xe/oa: Fix static checker warning about null gt There is a static checker warning that gt returned by xe_device_get_gt can be NULL and that is being dereferenced. Use xe_root_mmio_gt instead, which is equivalent and cannot return a NULL gt 0. Fixes: 10d42ef34bce ("drm/xe/oa: Assign hwe for OAM_SAG") Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://lore.kernel.org/r/20250715181422.2807624-1-ashutosh.dixit@intel.com (cherry picked from commit 308dc9b27874d0e8a0258869b9e681b0fdd2e579) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index d991fbd90f20..5729e7d3e335 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1941,7 +1941,7 @@ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) /* If not provided, OA unit defaults to OA unit 0 as per uapi */ if (!param->oa_unit) - param->oa_unit = &xe_device_get_gt(oa->xe, 0)->oa.oa_unit[0]; + param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { From 2bd986021c297ba675e831c3164bf9bdbbca3bc3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Jul 2025 17:59:44 -0500 Subject: [PATCH 3/9] drm/xe: Fix a NULL vs IS_ERR() bug in xe_i2c_register_adapter() The fwnode_create_software_node() function returns error pointers. It never returns NULL. Update the checks to match. Fixes: f0e53aadd702 ("drm/xe: Support for I2C attached MCUs") Signed-off-by: Dan Carpenter Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/65825d00-81ab-4665-af51-4fff6786a250@sabinyo.mountain Signed-off-by: Rodrigo Vivi (cherry picked from commit 2f264d58cc805a3cefc6b98097f90fbc388136ef) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index db9c0340be5c..1f19718db559 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -96,8 +96,8 @@ static int xe_i2c_register_adapter(struct xe_i2c *i2c) int ret; fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); - if (!fwnode) - return -ENOMEM; + if (IS_ERR(fwnode)) + return PTR_ERR(fwnode); /* * Not using platform_device_register_full() here because we don't have From dc94168eaa6f6f2476c4e1a894bd8d031df6226d Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 21 Jul 2025 17:45:20 -0400 Subject: [PATCH 4/9] drm/xe/uc: Fix missing unwind goto Fix missing unwind goto on error handling. Fixes: b2c4ac219fa4 ("drm/xe/uc: Disable GuC communication on hardware initialization error") Signed-off-by: Zhanjun Dong Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250721214520.954014-1-zhanjun.dong@intel.com (cherry picked from commit 176f44a5ec0b074aaf44852db77d0c183c36696d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3e0c3af235f2..465bda355443 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -164,7 +164,7 @@ static int vf_uc_load_hw(struct xe_uc *uc) err = xe_guc_opt_in_features_enable(&uc->guc); if (err) - return err; + goto err_out; err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) From cccb918e0231fefba059f049acced18760242136 Mon Sep 17 00:00:00 2001 From: Lukasz Laguna Date: Thu, 17 Jul 2025 17:54:20 +0200 Subject: [PATCH 5/9] drm/xe/vf: Don't register I2C devices if VF VF drivers can't access I2C devices, so skip their registration when running as VF. Signed-off-by: Lukasz Laguna Fixes: f0e53aadd702 ("drm/xe: Support for I2C attached MCUs") Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250717155420.25298-1-lukasz.laguna@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 9a220e065914b67b55d3d0ab91c3e215742fdd73) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_i2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 1f19718db559..bc7dc2099470 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -283,6 +283,9 @@ int xe_i2c_probe(struct xe_device *xe) if (xe->info.platform != XE_BATTLEMAGE) return 0; + if (IS_SRIOV_VF(xe)) + return 0; + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) return 0; From a2e1407eb8405e59c56b2325d910a73fd917eb3e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 23 Jul 2025 19:56:39 +0200 Subject: [PATCH 6/9] drm/xe/guc: Clear whole g2h_fence during initialization The struct g2h_fence must be explicitly initializated using the g2h_fence_init() function to avoid trash values in its members, but we missed to update this helper function with the new member. To fix that and avoid any future mistakes, memset the whole struct first, then update remaining non-zero members. Fixes: 94de94d24ea8 ("drm/xe/guc: Cancel ongoing H2G requests when stopping CT") Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Cc: Lukasz Laguna Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250723175639.206875-1-michal.wajdeczko@intel.com (cherry picked from commit 159afd92bae8153bdd8d8b34aea0d463fe19c978) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index b6acccfcd351..3f4e6a46ff16 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -95,12 +95,8 @@ struct g2h_fence { static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { + memset(g2h_fence, 0, sizeof(*g2h_fence)); g2h_fence->response_buffer = response_buffer; - g2h_fence->response_data = 0; - g2h_fence->response_len = 0; - g2h_fence->fail = false; - g2h_fence->retry = false; - g2h_fence->done = false; g2h_fence->seqno = ~0x0; } From 4846856c3a4afa882b6d1b842ed2fad6f3781f4d Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 24 Jul 2025 19:38:55 +0000 Subject: [PATCH 7/9] drm/xe/hw_engine_group: Avoid call kfree() for drmm_kzalloc() Memory allocated with drmm_kzalloc() should not be freed using kfree(), as it is managed by the DRM subsystem. The memory will be automatically freed when the associated drm_device is released. These 3 group pointers are allocated using drmm_kzalloc() in hw_engine_group_alloc(), so they don't require manual deallocation. Fixes: 67979060740f ("drm/xe/hw_engine_group: Fix potential leak") Cc: Michal Wajdeczko Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20250724193854.1124510-2-shuicheng.lin@intel.com (cherry picked from commit f98de826b418885a21ece67f0f5b921ae759b7bf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 28 ++++++------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 87a6dcb1b4b5..c926f840c87b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -75,25 +75,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) enum xe_hw_engine_id id; struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; struct xe_device *xe = gt_to_xe(gt); - int err; group_rcs_ccs = hw_engine_group_alloc(xe); - if (IS_ERR(group_rcs_ccs)) { - err = PTR_ERR(group_rcs_ccs); - goto err_group_rcs_ccs; - } + if (IS_ERR(group_rcs_ccs)) + return PTR_ERR(group_rcs_ccs); group_bcs = hw_engine_group_alloc(xe); - if (IS_ERR(group_bcs)) { - err = PTR_ERR(group_bcs); - goto err_group_bcs; - } + if (IS_ERR(group_bcs)) + return PTR_ERR(group_bcs); group_vcs_vecs = hw_engine_group_alloc(xe); - if (IS_ERR(group_vcs_vecs)) { - err = PTR_ERR(group_vcs_vecs); - goto err_group_vcs_vecs; - } + if (IS_ERR(group_vcs_vecs)) + return PTR_ERR(group_vcs_vecs); for_each_hw_engine(hwe, gt, id) { switch (hwe->class) { @@ -116,15 +109,6 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) } return 0; - -err_group_vcs_vecs: - kfree(group_vcs_vecs); -err_group_bcs: - kfree(group_bcs); -err_group_rcs_ccs: - kfree(group_rcs_ccs); - - return err; } /** From 942ac8da6388c25fe62b2792c78715e0ea6e649b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 22 Jul 2025 16:10:54 +0200 Subject: [PATCH 8/9] drm/xe/configfs: Fix pci_dev reference leak We are using pci_get_domain_bus_and_slot() function to verify if the given config directory name matches any existing PCI device, but we missed to call matching pci_dev_put() to release reference. While around, also change error code in case of no device match, to make it more specific than generic formatting error. Fixes: 16280ded45fb ("drm/xe: Add configfs to enable survivability mode") Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250722141059.30707-2-michal.wajdeczko@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 0bdd05c2a82bbf2419415d012fd4f5faeca7f1af) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_configfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 8ec1ff1e4e80..e9b46a2d0019 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -267,7 +267,8 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); if (!pdev) - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENODEV); + pci_dev_put(pdev); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) From f62408efc8669b82541295a4611494c8c8c52684 Mon Sep 17 00:00:00 2001 From: Lukasz Laguna Date: Tue, 29 Jul 2025 14:34:37 +0200 Subject: [PATCH 9/9] drm/xe/vf: Disable CSC support on VF CSC is not accessible by VF drivers, so disable its support flag on VF to prevent further initialization attempts. Fixes: e02cea83d32d ("drm/xe/gsc: add Battlemage support") Signed-off-by: Lukasz Laguna Cc: Alexander Usyskin Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20250729123437.5933-1-lukasz.laguna@intel.com (cherry picked from commit 552dbba1caaf0cb40ce961806d757615e26ec668) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6dc84e4ed281..5bd2f7d7b4ea 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -681,6 +681,7 @@ static void sriov_update_device_info(struct xe_device *xe) /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1;