From 8a3514d348de87a9d5e2ac00fbac4faae0b97996 Mon Sep 17 00:00:00 2001 From: Sanjeev Yadav Date: Fri, 23 May 2025 13:14:01 -0700 Subject: [PATCH 001/497] scsi: core: ufs: Fix a hang in the error handler ufshcd_err_handling_prepare() calls ufshcd_rpm_get_sync(). The latter function can only succeed if UFSHCD_EH_IN_PROGRESS is not set because resuming involves submitting a SCSI command and ufshcd_queuecommand() returns SCSI_MLQUEUE_HOST_BUSY if UFSHCD_EH_IN_PROGRESS is set. Fix this hang by setting UFSHCD_EH_IN_PROGRESS after ufshcd_rpm_get_sync() has been called instead of before. Backtrace: __switch_to+0x174/0x338 __schedule+0x600/0x9e4 schedule+0x7c/0xe8 schedule_timeout+0xa4/0x1c8 io_schedule_timeout+0x48/0x70 wait_for_common_io+0xa8/0x160 //waiting on START_STOP wait_for_completion_io_timeout+0x10/0x20 blk_execute_rq+0xe4/0x1e4 scsi_execute_cmd+0x108/0x244 ufshcd_set_dev_pwr_mode+0xe8/0x250 __ufshcd_wl_resume+0x94/0x354 ufshcd_wl_runtime_resume+0x3c/0x174 scsi_runtime_resume+0x64/0xa4 rpm_resume+0x15c/0xa1c __pm_runtime_resume+0x4c/0x90 // Runtime resume ongoing ufshcd_err_handler+0x1a0/0xd08 process_one_work+0x174/0x808 worker_thread+0x15c/0x490 kthread+0xf4/0x1ec ret_from_fork+0x10/0x20 Signed-off-by: Sanjeev Yadav [ bvanassche: rewrote patch description ] Fixes: 62694735ca95 ("[SCSI] ufs: Add runtime PM support for UFS host controller driver") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20250523201409.1676055-1-bvanassche@acm.org Reviewed-by: Peter Wang Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index a7513f256057..a1d7c8660c1b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6623,9 +6623,14 @@ static void ufshcd_err_handler(struct work_struct *work) up(&hba->host_sem); return; } + spin_unlock_irqrestore(hba->host->host_lock, flags); + + ufshcd_err_handling_prepare(hba); + + spin_lock_irqsave(hba->host->host_lock, flags); ufshcd_set_eh_in_progress(hba); spin_unlock_irqrestore(hba->host->host_lock, flags); - ufshcd_err_handling_prepare(hba); + /* Complete requests that have door-bell cleared by h/w */ ufshcd_complete_requests(hba, false); spin_lock_irqsave(hba->host->host_lock, flags); From b55eb6eb2a7427428c59b293a0900131fc849595 Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Wed, 4 Jun 2025 15:03:42 +0000 Subject: [PATCH 002/497] pidfs: never refuse ppid == 0 in PIDFD_GET_INFO In systemd we spotted an issue after switching to ioctl(PIDFD_GET_INFO) for obtaining pid number the pidfd refers to, that for processes with a parent from outer pidns PIDFD_GET_INFO unexpectedly yields -ESRCH [1]. It turned out that there's an arbitrary check blocking this, which is not really sensible given getppid() happily returns 0 for such processes. Just drop the spurious check and userspace ought to handle ppid == 0 properly everywhere. [1] https://github.com/systemd/systemd/issues/37715 Fixes: cdda1f26e74b ("pidfd: add ioctl to retrieve pid info") Signed-off-by: Mike Yuan Link: https://lore.kernel.org/20250604150238.42664-1-me@yhndnzj.com Cc: Christian Brauner Cc: Luca Boccassi Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- fs/pidfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index c1f0a067be40..69919be1c9d8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -366,7 +366,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) kinfo.pid = task_pid_vnr(task); kinfo.mask |= PIDFD_INFO_PID; - if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + if (kinfo.pid == 0 || kinfo.tgid == 0) return -ESRCH; copy_out: From 714d02b41939d2720379e11ef25227aec4e5bec9 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 5 Jun 2025 12:15:30 +0200 Subject: [PATCH 003/497] ovl: fix regression caused by lookup helpers API changes The lookup helpers API was changed by merge of vfs-6.16-rc1.async.dir to pass a non-const qstr pointer argument to lookup_one*() helpers. All of the callers of this API were changed to pass a pointer to temp copy of qstr, except overlays that was passing a const pointer to dentry->d_name that was changed to pass a non-const copy instead when doing a lookup in lower layer which is not the fs of said dentry. This wrong use of the API caused a regression in fstest overlay/012. Fix the regression by making a non-const copy of dentry->d_name prior to calling the lookup API, but the API should be fixed to not allow this class of bugs. Cc: NeilBrown Fixes: 5741909697a3 ("VFS: improve interface for lookup_one functions") Fixes: 390e34bc1490 ("VFS: change lookup_one_common and lookup_noperm_common to take a qstr") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250605101530.2336320-1-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/overlayfs/namei.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index bf722daf19a9..f75c49ceb460 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1371,7 +1371,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, bool ovl_lower_positive(struct dentry *dentry) { struct ovl_entry *poe = OVL_E(dentry->d_parent); - struct qstr *name = &dentry->d_name; + const struct qstr *name = &dentry->d_name; const struct cred *old_cred; unsigned int i; bool positive = false; @@ -1394,9 +1394,15 @@ bool ovl_lower_positive(struct dentry *dentry) struct dentry *this; struct ovl_path *parentpath = &ovl_lowerstack(poe)[i]; + /* + * We need to make a non-const copy of dentry->d_name, + * because lookup_one_positive_unlocked() will hash name + * with parentpath base, which is on another (lower fs). + */ this = lookup_one_positive_unlocked( mnt_idmap(parentpath->layer->mnt), - name, parentpath->dentry); + &QSTR_LEN(name->name, name->len), + parentpath->dentry); if (IS_ERR(this)) { switch (PTR_ERR(this)) { case -ENOENT: From 4f6fc782128355931527cefe3eb45338abd8ab39 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Jun 2025 12:31:45 +0200 Subject: [PATCH 004/497] perf: Fix sample vs do_exit() Baisheng Gao reported an ARM64 crash, which Mark decoded as being a synchronous external abort -- most likely due to trying to access MMIO in bad ways. The crash further shows perf trying to do a user stack sample while in exit_mmap()'s tlb_finish_mmu() -- i.e. while tearing down the address space it is trying to access. It turns out that we stop perf after we tear down the userspace mm; a receipie for disaster, since perf likes to access userspace for various reasons. Flip this order by moving up where we stop perf in do_exit(). Additionally, harden PERF_SAMPLE_CALLCHAIN and PERF_SAMPLE_STACK_USER to abort when the current task does not have an mm (exit_mm() makes sure to set current->mm = NULL; before commencing with the actual teardown). Such that CPU wide events don't trip on this same problem. Fixes: c5ebcedb566e ("perf: Add ability to attach user stack dump to sample") Reported-by: Baisheng Gao Suggested-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250605110815.GQ39944@noisy.programming.kicks-ass.net --- kernel/events/core.c | 7 +++++++ kernel/exit.c | 17 +++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f34c99f8ce8f..26dec1d8a5ab 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7439,6 +7439,10 @@ perf_sample_ustack_size(u16 stack_size, u16 header_size, if (!regs) return 0; + /* No mm, no stack, no dump. */ + if (!current->mm) + return 0; + /* * Check if we fit in with the requested stack size into the: * - TASK_SIZE @@ -8150,6 +8154,9 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) const u32 max_stack = event->attr.sample_max_stack; struct perf_callchain_entry *callchain; + if (!current->mm) + user = false; + if (!kernel && !user) return &__empty_callchain; diff --git a/kernel/exit.c b/kernel/exit.c index 38645039dd8f..1883150c9422 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -944,6 +944,15 @@ void __noreturn do_exit(long code) taskstats_exit(tsk, group_dead); trace_sched_process_exit(tsk, group_dead); + /* + * Since sampling can touch ->mm, make sure to stop everything before we + * tear it down. + * + * Also flushes inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + */ + perf_event_exit_task(tsk); + exit_mm(); if (group_dead) @@ -959,14 +968,6 @@ void __noreturn do_exit(long code) exit_task_work(tsk); exit_thread(tsk); - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - * - * because of cgroup mode, must be called before cgroup_exit() - */ - perf_event_exit_task(tsk); - sched_autogroup_exit_task(tsk); cgroup_exit(tsk); From 61988e36dc5457cdff7ae7927e8d9ad1419ee998 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Jun 2025 12:37:11 +0200 Subject: [PATCH 005/497] perf: Fix cgroup state vs ERROR While chasing down a missing perf_cgroup_event_disable() elsewhere, Leo Yan found that both perf_put_aux_event() and perf_remove_sibling_event() were also missing one. Specifically, the rule is that events that switch to OFF,ERROR need to call perf_cgroup_event_disable(). Unify the disable paths to ensure this. Fixes: ab43762ef010 ("perf: Allow normal events to output AUX data") Fixes: 9f0c4fa111dc ("perf/core: Add a new PERF_EV_CAP_SIBLING event capability") Reported-by: Leo Yan Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250605123343.GD35970@noisy.programming.kicks-ass.net --- kernel/events/core.c | 51 ++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 26dec1d8a5ab..1cc98b9b3c0b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2149,8 +2149,9 @@ perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event) } static void put_event(struct perf_event *event); -static void event_sched_out(struct perf_event *event, - struct perf_event_context *ctx); +static void __event_disable(struct perf_event *event, + struct perf_event_context *ctx, + enum perf_event_state state); static void perf_put_aux_event(struct perf_event *event) { @@ -2183,8 +2184,7 @@ static void perf_put_aux_event(struct perf_event *event) * state so that we don't try to schedule it again. Note * that perf_event_enable() will clear the ERROR status. */ - event_sched_out(iter, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); + __event_disable(iter, ctx, PERF_EVENT_STATE_ERROR); } } @@ -2242,18 +2242,6 @@ static inline struct list_head *get_event_list(struct perf_event *event) &event->pmu_ctx->flexible_active; } -/* - * Events that have PERF_EV_CAP_SIBLING require being part of a group and - * cannot exist on their own, schedule them out and move them into the ERROR - * state. Also see _perf_event_enable(), it will not be able to recover - * this ERROR state. - */ -static inline void perf_remove_sibling_event(struct perf_event *event) -{ - event_sched_out(event, event->ctx); - perf_event_set_state(event, PERF_EVENT_STATE_ERROR); -} - static void perf_group_detach(struct perf_event *event) { struct perf_event *leader = event->group_leader; @@ -2289,8 +2277,15 @@ static void perf_group_detach(struct perf_event *event) */ list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) { + /* + * Events that have PERF_EV_CAP_SIBLING require being part of + * a group and cannot exist on their own, schedule them out + * and move them into the ERROR state. Also see + * _perf_event_enable(), it will not be able to recover this + * ERROR state. + */ if (sibling->event_caps & PERF_EV_CAP_SIBLING) - perf_remove_sibling_event(sibling); + __event_disable(sibling, ctx, PERF_EVENT_STATE_ERROR); sibling->group_leader = sibling; list_del_init(&sibling->sibling_list); @@ -2562,6 +2557,15 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla event_function_call(event, __perf_remove_from_context, (void *)flags); } +static void __event_disable(struct perf_event *event, + struct perf_event_context *ctx, + enum perf_event_state state) +{ + event_sched_out(event, ctx); + perf_cgroup_event_disable(event, ctx); + perf_event_set_state(event, state); +} + /* * Cross CPU call to disable a performance event */ @@ -2576,13 +2580,18 @@ static void __perf_event_disable(struct perf_event *event, perf_pmu_disable(event->pmu_ctx->pmu); ctx_time_update_event(ctx, event); + /* + * When disabling a group leader, the whole group becomes ineligible + * to run, so schedule out the full group. + */ if (event == event->group_leader) group_sched_out(event, ctx); - else - event_sched_out(event, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_OFF); - perf_cgroup_event_disable(event, ctx); + /* + * But only mark the leader OFF; the siblings will remain + * INACTIVE. + */ + __event_disable(event, ctx, PERF_EVENT_STATE_OFF); perf_pmu_enable(event->pmu_ctx->pmu); } From 3b7a34aebbdf2a4b7295205bf0c654294283ec82 Mon Sep 17 00:00:00 2001 From: Yeoreum Yun Date: Mon, 2 Jun 2025 19:40:49 +0100 Subject: [PATCH 006/497] perf: Fix dangling cgroup pointer in cpuctx Commit a3c3c6667("perf/core: Fix child_total_time_enabled accounting bug at task exit") moves the event->state update to before list_del_event(). This makes the event->state test in list_del_event() always false; never calling perf_cgroup_event_disable(). As a result, cpuctx->cgrp won't be cleared properly; causing havoc. Fixes: a3c3c6667("perf/core: Fix child_total_time_enabled accounting bug at task exit") Signed-off-by: Yeoreum Yun Signed-off-by: Peter Zijlstra (Intel) Tested-by: David Wang <00107082@163.com> Link: https://lore.kernel.org/all/aD2TspKH%2F7yvfYoO@e129823.arm.com/ --- kernel/events/core.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1cc98b9b3c0b..d78608323916 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2120,18 +2120,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) if (event->group_leader == event) del_event_from_groups(event, ctx); - /* - * If event was in error state, then keep it - * that way, otherwise bogus counts will be - * returned on read(). The only way to get out - * of error state is by explicit re-enabling - * of the event - */ - if (event->state > PERF_EVENT_STATE_OFF) { - perf_cgroup_event_disable(event, ctx); - perf_event_set_state(event, PERF_EVENT_STATE_OFF); - } - ctx->generation++; event->pmu_ctx->nr_events--; } @@ -2488,11 +2476,14 @@ __perf_remove_from_context(struct perf_event *event, state = PERF_EVENT_STATE_EXIT; if (flags & DETACH_REVOKE) state = PERF_EVENT_STATE_REVOKED; - if (flags & DETACH_DEAD) { - event->pending_disable = 1; + if (flags & DETACH_DEAD) state = PERF_EVENT_STATE_DEAD; - } + event_sched_out(event, ctx); + + if (event->state > PERF_EVENT_STATE_OFF) + perf_cgroup_event_disable(event, ctx); + perf_event_set_state(event, min(event->state, state)); if (flags & DETACH_GROUP) From 3172fb986666dfb71bf483b6d3539e1e587fa197 Mon Sep 17 00:00:00 2001 From: Luo Gengkun Date: Wed, 4 Jun 2025 03:39:24 +0000 Subject: [PATCH 007/497] perf/core: Fix WARN in perf_cgroup_switch() There may be concurrency between perf_cgroup_switch and perf_cgroup_event_disable. Consider the following scenario: after a new perf cgroup event is created on CPU0, the new event may not trigger a reprogramming, causing ctx->is_active to be 0. In this case, when CPU1 disables this perf event, it executes __perf_remove_from_context-> list _del_event->perf_cgroup_event_disable on CPU1, which causes a race with perf_cgroup_switch running on CPU0. The following describes the details of this concurrency scenario: CPU0 CPU1 perf_cgroup_switch: ... # cpuctx->cgrp is not NULL here if (READ_ONCE(cpuctx->cgrp) == NULL) return; perf_remove_from_context: ... raw_spin_lock_irq(&ctx->lock); ... # ctx->is_active == 0 because reprogramm is not # tigger, so CPU1 can do __perf_remove_from_context # for CPU0 __perf_remove_from_context: perf_cgroup_event_disable: ... if (--ctx->nr_cgroups) ... # this warning will happened because CPU1 changed # ctx.nr_cgroups to 0. WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); [peterz: use guard instead of goto unlock] Fixes: db4a835601b7 ("perf/core: Set cgroup in CPU contexts for new cgroup events") Signed-off-by: Luo Gengkun Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250604033924.3914647-3-luogengkun@huaweicloud.com --- kernel/events/core.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d78608323916..d7cf008f3d75 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -207,6 +207,19 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, __perf_ctx_unlock(&cpuctx->ctx); } +typedef struct { + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; +} class_perf_ctx_lock_t; + +static inline void class_perf_ctx_lock_destructor(class_perf_ctx_lock_t *_T) +{ perf_ctx_unlock(_T->cpuctx, _T->ctx); } + +static inline class_perf_ctx_lock_t +class_perf_ctx_lock_constructor(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ perf_ctx_lock(cpuctx, ctx); return (class_perf_ctx_lock_t){ cpuctx, ctx }; } + #define TASK_TOMBSTONE ((void *)-1L) static bool is_kernel_event(struct perf_event *event) @@ -944,7 +957,13 @@ static void perf_cgroup_switch(struct task_struct *task) if (READ_ONCE(cpuctx->cgrp) == cgrp) return; - perf_ctx_lock(cpuctx, cpuctx->task_ctx); + guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx); + /* + * Re-check, could've raced vs perf_remove_from_context(). + */ + if (READ_ONCE(cpuctx->cgrp) == NULL) + return; + perf_ctx_disable(&cpuctx->ctx, true); ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); @@ -962,7 +981,6 @@ static void perf_cgroup_switch(struct task_struct *task) ctx_sched_in(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP); perf_ctx_enable(&cpuctx->ctx, true); - perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } static int perf_cgroup_ensure_storage(struct perf_event *event, From 49b393af3130c7712c7e8f215f4126c9a8060fa6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Jun 2025 10:21:38 +0200 Subject: [PATCH 008/497] perf: Add comment to enum perf_event_state Better describe the event states. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Leo Yan Link: https://lkml.kernel.org/r/20250604135801.GK38114@noisy.programming.kicks-ass.net --- include/linux/perf_event.h | 42 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 52dc7cfab0e0..ec9d96025683 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -635,8 +635,46 @@ struct perf_addr_filter_range { unsigned long size; }; -/** - * enum perf_event_state - the states of an event: +/* + * The normal states are: + * + * ACTIVE --. + * ^ | + * | | + * sched_{in,out}() | + * | | + * v | + * ,---> INACTIVE --+ <-. + * | | | + * | {dis,en}able() + * sched_in() | | + * | OFF <--' --+ + * | | + * `---> ERROR ------' + * + * That is: + * + * sched_in: INACTIVE -> {ACTIVE,ERROR} + * sched_out: ACTIVE -> INACTIVE + * disable: {ACTIVE,INACTIVE} -> OFF + * enable: {OFF,ERROR} -> INACTIVE + * + * Where {OFF,ERROR} are disabled states. + * + * Then we have the {EXIT,REVOKED,DEAD} states which are various shades of + * defunct events: + * + * - EXIT means task that the even was assigned to died, but child events + * still live, and further children can still be created. But the event + * itself will never be active again. It can only transition to + * {REVOKED,DEAD}; + * + * - REVOKED means the PMU the event was associated with is gone; all + * functionality is stopped but the event is still alive. Can only + * transition to DEAD; + * + * - DEAD event really is DYING tearing down state and freeing bits. + * */ enum perf_event_state { PERF_EVENT_STATE_DEAD = -5, From 1a9dcf69c7a97e733aa2fc026db22f22928ca7b7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 28 May 2025 10:55:19 +0200 Subject: [PATCH 009/497] selftests/futex: getopt() requires int as return value. Mark reported that futex_priv_hash fails on ARM64. It turns out that the command line parsing does not terminate properly and ends in the default case assuming an invalid option was passed. Use an int as the return type for getopt(). Closes: https://lore.kernel.org/all/31869a69-063f-44a3-a079-ba71b2506cce@sirena.org.uk/ Fixes: 3163369407baf ("selftests/futex: Add futex_numa_mpol") Fixes: cda95faef7bcf ("selftests/futex: Add futex_priv_hash") Reported-by: Mark Brown Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250528085521.1938355-2-bigeasy@linutronix.de --- tools/testing/selftests/futex/functional/futex_numa_mpol.c | 2 +- tools/testing/selftests/futex/functional/futex_priv_hash.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index 20a9d3ecf743..564dbd02d2f4 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) struct futex32_numa *futex_numa; int mem_size, i; void *futex_ptr; - char c; + int c; while ((c = getopt(argc, argv, "chv:")) != -1) { switch (c) { diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 2dca18fefedc..24a92dc94eb8 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -130,7 +130,7 @@ int main(int argc, char *argv[]) pthread_mutexattr_t mutex_attr_pi; int use_global_hash = 0; int ret; - char c; + int c; while ((c = getopt(argc, argv, "cghv:")) != -1) { switch (c) { From 0ecb4232fc65e659ca7020f8bb2e0fc347acfb7d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 28 May 2025 10:55:20 +0200 Subject: [PATCH 010/497] selftests/futex: Set the home_node in futex_numa_mpol The test fails at the MPOL step if multiple nodes are available. The reason is that mbind() sets the policy but the home_node, which is retrieved by the futex code, is not set. This causes to retrieve the current node and with multiple nodes it fails on one of the iterations. Use numa_set_mempolicy_home_node() to set the expected node. Use ksft_exit_fail_msg() to fail and exit in order not to confuse ktap. Fixes: 3163369407baf ("selftests/futex: Add futex_numa_mpol") Suggested-by: Vlastimil Babka Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250528085521.1938355-3-bigeasy@linutronix.de --- .../testing/selftests/futex/functional/futex_numa_mpol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index 564dbd02d2f4..a9ecfb2d3932 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -210,6 +210,10 @@ int main(int argc, char *argv[]) ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask, sizeof(nodemask) * 8, 0); if (ret == 0) { + ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0); + if (ret != 0) + ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno); + ksft_print_msg("Node %d test\n", i); futex_numa->futex = 0; futex_numa->numa = FUTEX_NO_NODE; @@ -220,8 +224,8 @@ int main(int argc, char *argv[]) if (0) test_futex_mpol(futex_numa, 0); if (futex_numa->numa != i) { - ksft_test_result_fail("Returned NUMA node is %d expected %d\n", - futex_numa->numa, i); + ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n", + futex_numa->numa, i); } } } From 8337204c58899fe422db765b481711eb2d95eb0b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 28 May 2025 10:55:21 +0200 Subject: [PATCH 011/497] futex: Handle invalid node numbers supplied by user syzbot used a negative node number which was not rejected early and led to invalid memory access in node_possible(). Reject negative node numbers except for FUTEX_NO_NODE. [bigeasy: Keep the FUTEX_NO_NODE check] Closes: https://lore.kernel.org/all/6835bfe3.a70a0220.253bc2.00b5.GAE@google.com/ Fixes: cec199c5e39bd ("futex: Implement FUTEX2_NUMA") Signed-off-by: Peter Zijlstra (Intel) Reported-by: syzbot+9afaf6749e3a7aa1bdf3@syzkaller.appspotmail.com Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250528085521.1938355-4-bigeasy@linutronix.de --- kernel/futex/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 565f9717c6ca..b652d2f60c40 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -583,8 +583,8 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, if (futex_get_value(&node, naddr)) return -EFAULT; - if (node != FUTEX_NO_NODE && - (node >= MAX_NUMNODES || !node_possible(node))) + if ((node != FUTEX_NO_NODE) && + ((unsigned int)node >= MAX_NUMNODES || !node_possible(node))) return -EINVAL; } From 6678791ee3da0b78c28fe7d77814097f53cbb8df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jun 2025 08:08:21 +0100 Subject: [PATCH 012/497] KVM: arm64: Add assignment-specific sysreg accessor Assigning a value to a system register doesn't do what it is supposed to be doing if that register is one that has RESx bits. The main problem is that we use __vcpu_sys_reg(), which can be used both as a lvalue and rvalue. When used as a lvalue, the bit masking occurs *before* the new value is assigned, meaning that we (1) do pointless work on the old cvalue, and (2) potentially assign an invalid value as we fail to apply the masks to it. Fix this by providing a new __vcpu_assign_sys_reg() that does what it says on the tin, and sanitises the *new* value instead of the old one. This comes with a significant amount of churn. Reviewed-by: Miguel Luis Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250603070824.1192795-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 11 ++++++ arch/arm64/kvm/arch_timer.c | 16 ++++----- arch/arm64/kvm/hyp/exception.c | 4 +-- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +-- arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 6 ++-- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 4 +-- arch/arm64/kvm/hyp/vhe/switch.c | 4 +-- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 42 +++++++++++----------- arch/arm64/kvm/pmu-emul.c | 14 ++++---- arch/arm64/kvm/sys_regs.c | 38 ++++++++++---------- arch/arm64/kvm/sys_regs.h | 4 +-- arch/arm64/kvm/vgic/vgic-v3-nested.c | 10 +++--- 12 files changed, 85 insertions(+), 72 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d941abc6b5ee..3b84ad91116b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1107,6 +1107,17 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r) #define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r)) u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); + +#define __vcpu_assign_sys_reg(v, r, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) + #define __vcpu_sys_reg(v,r) \ (*({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 5133dcbfe9f7..5a67a6d4d95b 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -108,16 +108,16 @@ static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - __vcpu_sys_reg(vcpu, CNTV_CTL_EL0) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl); break; case TIMER_PTIMER: - __vcpu_sys_reg(vcpu, CNTP_CTL_EL0) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl); break; case TIMER_HVTIMER: - __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl); break; case TIMER_HPTIMER: - __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2) = ctl; + __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl); break; default: WARN_ON(1); @@ -130,16 +130,16 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0) = cval; + __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval); break; case TIMER_PTIMER: - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = cval; + __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval); break; case TIMER_HVTIMER: - __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2) = cval; + __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval); break; case TIMER_HPTIMER: - __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = cval; + __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval); break; default: WARN_ON(1); diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 424a5107cddb..6a2a899a344e 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -37,7 +37,7 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) if (unlikely(vcpu_has_nv(vcpu))) vcpu_write_sys_reg(vcpu, val, reg); else if (!__vcpu_write_sys_reg_to_cpu(val, reg)) - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, @@ -51,7 +51,7 @@ static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long target_mode, } else if (has_vhe()) { write_sysreg_el1(val, SYS_SPSR); } else { - __vcpu_sys_reg(vcpu, SPSR_EL1) = val; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, val); } } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index eef310cdbdbd..aa5b561b9218 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -45,7 +45,7 @@ static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) if (!vcpu_el1_is_32bit(vcpu)) return; - __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); + __vcpu_assign_sys_reg(vcpu, FPEXC32_EL2, read_sysreg(fpexc32_el2)); } static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) @@ -457,7 +457,7 @@ static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) */ if (vcpu_has_sve(vcpu)) { zcr_el1 = read_sysreg_el1(SYS_ZCR); - __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1; + __vcpu_assign_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu), zcr_el1); /* * The guest's state is always saved using the guest's max VL. diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index b9cff893bbe0..4d0dbea4c56f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -307,11 +307,11 @@ static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); - __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); - __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + __vcpu_assign_sys_reg(vcpu, DACR32_EL2, read_sysreg(dacr32_el2)); + __vcpu_assign_sys_reg(vcpu, IFSR32_EL2, read_sysreg(ifsr32_el2)); if (has_vhe() || kvm_debug_regs_in_use(vcpu)) - __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); + __vcpu_assign_sys_reg(vcpu, DBGVCR32_EL2, read_sysreg(dbgvcr32_el2)); } static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8e8848de4d47..e9198e56e784 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -26,7 +26,7 @@ void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) { - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + __vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR)); /* * On saving/restoring guest sve state, always use the maximum VL for * the guest. The layout of the data when saving the sve state depends @@ -79,7 +79,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm)); if (has_fpmr) - __vcpu_sys_reg(vcpu, FPMR) = read_sysreg_s(SYS_FPMR); + __vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR)); if (system_supports_sve()) __hyp_sve_restore_host(); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index c9b330dc2066..09df2b42bc1b 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -223,9 +223,9 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ val = read_sysreg_el0(SYS_CNTP_CVAL); if (map.direct_ptimer == vcpu_ptimer(vcpu)) - __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0) = val; + __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, val); if (map.direct_ptimer == vcpu_hptimer(vcpu)) - __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2) = val; + __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, val); offset = read_sysreg_s(SYS_CNTPOFF_EL2); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 3814b0b2c937..34c7bf7fe9de 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -18,17 +18,17 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) { /* These registers are common with EL1 */ - __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1); - __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1); + __vcpu_assign_sys_reg(vcpu, PAR_EL1, read_sysreg(par_el1)); + __vcpu_assign_sys_reg(vcpu, TPIDR_EL1, read_sysreg(tpidr_el1)); - __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR); - __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0); - __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1); - __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR); - __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR); - __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR); - __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR); - __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR); + __vcpu_assign_sys_reg(vcpu, ESR_EL2, read_sysreg_el1(SYS_ESR)); + __vcpu_assign_sys_reg(vcpu, AFSR0_EL2, read_sysreg_el1(SYS_AFSR0)); + __vcpu_assign_sys_reg(vcpu, AFSR1_EL2, read_sysreg_el1(SYS_AFSR1)); + __vcpu_assign_sys_reg(vcpu, FAR_EL2, read_sysreg_el1(SYS_FAR)); + __vcpu_assign_sys_reg(vcpu, MAIR_EL2, read_sysreg_el1(SYS_MAIR)); + __vcpu_assign_sys_reg(vcpu, VBAR_EL2, read_sysreg_el1(SYS_VBAR)); + __vcpu_assign_sys_reg(vcpu, CONTEXTIDR_EL2, read_sysreg_el1(SYS_CONTEXTIDR)); + __vcpu_assign_sys_reg(vcpu, AMAIR_EL2, read_sysreg_el1(SYS_AMAIR)); /* * In VHE mode those registers are compatible between EL1 and EL2, @@ -46,21 +46,21 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) * are always trapped, ensuring that the in-memory * copy is always up-to-date. A small blessing... */ - __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR); - __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0); - __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1); - __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR); + __vcpu_assign_sys_reg(vcpu, SCTLR_EL2, read_sysreg_el1(SYS_SCTLR)); + __vcpu_assign_sys_reg(vcpu, TTBR0_EL2, read_sysreg_el1(SYS_TTBR0)); + __vcpu_assign_sys_reg(vcpu, TTBR1_EL2, read_sysreg_el1(SYS_TTBR1)); + __vcpu_assign_sys_reg(vcpu, TCR_EL2, read_sysreg_el1(SYS_TCR)); if (ctxt_has_tcrx(&vcpu->arch.ctxt)) { - __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2); + __vcpu_assign_sys_reg(vcpu, TCR2_EL2, read_sysreg_el1(SYS_TCR2)); if (ctxt_has_s1pie(&vcpu->arch.ctxt)) { - __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0); - __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR); + __vcpu_assign_sys_reg(vcpu, PIRE0_EL2, read_sysreg_el1(SYS_PIRE0)); + __vcpu_assign_sys_reg(vcpu, PIR_EL2, read_sysreg_el1(SYS_PIR)); } if (ctxt_has_s1poe(&vcpu->arch.ctxt)) - __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR); + __vcpu_assign_sys_reg(vcpu, POR_EL2, read_sysreg_el1(SYS_POR)); } /* @@ -74,9 +74,9 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; } - __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1); - __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR); - __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR); + __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1)); + __vcpu_assign_sys_reg(vcpu, ELR_EL2, read_sysreg_el1(SYS_ELR)); + __vcpu_assign_sys_reg(vcpu, SPSR_EL2, read_sysreg_el1(SYS_SPSR)); } static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 25c29107f13f..4f0ae8073f78 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -178,7 +178,7 @@ static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) val |= lower_32_bits(val); } - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); /* Recreate the perf event to reflect the updated sample_period */ kvm_pmu_create_perf_event(pmc); @@ -204,7 +204,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) void kvm_pmu_set_counter_value_user(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); - __vcpu_sys_reg(vcpu, counter_index_to_reg(select_idx)) = val; + __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(select_idx), val); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); } @@ -239,7 +239,7 @@ static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) reg = counter_index_to_reg(pmc->idx); - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); kvm_pmu_release_perf_event(pmc); } @@ -503,7 +503,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; if (!kvm_pmc_is_64bit(pmc)) reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; + __vcpu_assign_sys_reg(vcpu, counter_index_to_reg(i), reg); /* No overflow? move on */ if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) @@ -602,7 +602,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); /* The reset bits don't indicate any state, and shouldn't be saved. */ - __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); + __vcpu_assign_sys_reg(vcpu, PMCR_EL0, (val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P))); if (val & ARMV8_PMU_PMCR_C) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); @@ -779,7 +779,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 reg; reg = counter_index_to_evtreg(pmc->idx); - __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_assign_sys_reg(vcpu, reg, (data & kvm_pmu_evtyper_mask(vcpu->kvm))); kvm_pmu_create_perf_event(pmc); } @@ -1038,7 +1038,7 @@ static void kvm_arm_set_nr_counters(struct kvm *kvm, unsigned int nr) u64 val = __vcpu_sys_reg(vcpu, MDCR_EL2); val &= ~MDCR_EL2_HPMN; val |= FIELD_PREP(MDCR_EL2_HPMN, kvm->arch.nr_pmu_counters); - __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); } } } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 707c651aff03..93d0ca7ed936 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -228,7 +228,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) * to reverse-translate virtual EL2 system registers for a * non-VHE guest hypervisor. */ - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); switch (reg) { case CNTHCTL_EL2: @@ -263,7 +263,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) return; memory_write: - __vcpu_sys_reg(vcpu, reg) = val; + __vcpu_assign_sys_reg(vcpu, reg, val); } /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ @@ -605,7 +605,7 @@ static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val ^ rd->val) & ~OSLSR_EL1_OSLK) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -835,7 +835,7 @@ static u64 reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * The value of PMCR.N field is included when the * vCPU register is read via kvm_vcpu_read_pmcr(). */ - __vcpu_sys_reg(vcpu, r->reg) = pmcr; + __vcpu_assign_sys_reg(vcpu, r->reg, pmcr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -907,7 +907,7 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) - __vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval; + __vcpu_assign_sys_reg(vcpu, PMSELR_EL0, p->regval); else /* return PMSELR.SEL field */ p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0) @@ -1076,7 +1076,7 @@ static int set_pmreg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, u64 va { u64 mask = kvm_pmu_accessible_counter_mask(vcpu); - __vcpu_sys_reg(vcpu, r->reg) = val & mask; + __vcpu_assign_sys_reg(vcpu, r->reg, val & mask); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; @@ -1185,8 +1185,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!vcpu_mode_priv(vcpu)) return undef_access(vcpu, p, r); - __vcpu_sys_reg(vcpu, PMUSERENR_EL0) = - p->regval & ARMV8_PMU_USERENR_MASK; + __vcpu_assign_sys_reg(vcpu, PMUSERENR_EL0, + (p->regval & ARMV8_PMU_USERENR_MASK)); } else { p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0) & ARMV8_PMU_USERENR_MASK; @@ -1237,7 +1237,7 @@ static int set_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); return 0; @@ -2207,7 +2207,7 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (kvm_has_mte(vcpu->kvm)) clidr |= 2ULL << CLIDR_TTYPE_SHIFT(loc); - __vcpu_sys_reg(vcpu, r->reg) = clidr; + __vcpu_assign_sys_reg(vcpu, r->reg, clidr); return __vcpu_sys_reg(vcpu, r->reg); } @@ -2221,7 +2221,7 @@ static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc)) return -EINVAL; - __vcpu_sys_reg(vcpu, rd->reg) = val; + __vcpu_assign_sys_reg(vcpu, rd->reg, val); return 0; } @@ -2398,7 +2398,7 @@ static bool access_sp_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SP_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SP_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SP_EL1); @@ -2422,7 +2422,7 @@ static bool access_spsr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, SPSR_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, SPSR_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, SPSR_EL1); @@ -2434,7 +2434,7 @@ static bool access_cntkctl_el12(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (p->is_write) - __vcpu_sys_reg(vcpu, CNTKCTL_EL1) = p->regval; + __vcpu_assign_sys_reg(vcpu, CNTKCTL_EL1, p->regval); else p->regval = __vcpu_sys_reg(vcpu, CNTKCTL_EL1); @@ -2448,7 +2448,9 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1)) val |= HCR_E2H; - return __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); + + return __vcpu_sys_reg(vcpu, r->reg); } static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu, @@ -2619,7 +2621,7 @@ static bool access_mdcr(struct kvm_vcpu *vcpu, u64_replace_bits(val, hpmn, MDCR_EL2_HPMN); } - __vcpu_sys_reg(vcpu, MDCR_EL2) = val; + __vcpu_assign_sys_reg(vcpu, MDCR_EL2, val); /* * Request a reload of the PMU to enable/disable the counters @@ -2748,7 +2750,7 @@ static int set_imp_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, static u64 reset_mdcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - __vcpu_sys_reg(vcpu, r->reg) = vcpu->kvm->arch.nr_pmu_counters; + __vcpu_assign_sys_reg(vcpu, r->reg, vcpu->kvm->arch.nr_pmu_counters); return vcpu->kvm->arch.nr_pmu_counters; } @@ -5006,7 +5008,7 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, if (r->set_user) { ret = (r->set_user)(vcpu, r, val); } else { - __vcpu_sys_reg(vcpu, r->reg) = val; + __vcpu_assign_sys_reg(vcpu, r->reg, val); ret = 0; } diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index cc6338d38766..ef97d9fc67cc 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -137,7 +137,7 @@ static inline u64 reset_unknown(struct kvm_vcpu *vcpu, { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - __vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL; + __vcpu_assign_sys_reg(vcpu, r->reg, 0x1de7ec7edbadc0deULL); return __vcpu_sys_reg(vcpu, r->reg); } @@ -145,7 +145,7 @@ static inline u64 reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { BUG_ON(!r->reg); BUG_ON(r->reg >= NR_SYS_REGS); - __vcpu_sys_reg(vcpu, r->reg) = r->val; + __vcpu_assign_sys_reg(vcpu, r->reg, r->val); return __vcpu_sys_reg(vcpu, r->reg); } diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index 4f6954c30674..d22a8ad7bcc5 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -356,12 +356,12 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu) val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); val &= ~ICH_HCR_EL2_EOIcount_MASK; val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); - __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val; - __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr; + __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val); + __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr); for (i = 0; i < 4; i++) { - __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i]; - __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i]; + __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]); + __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]); } for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { @@ -370,7 +370,7 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu) val &= ~ICH_LR_STATE; val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE; - __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val; + __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); s_cpu_if->vgic_lr[i] = 0; } From 8800b7c4bbede3cd40831726d3f98e8080baf4df Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jun 2025 08:08:22 +0100 Subject: [PATCH 013/497] KVM: arm64: Add RMW specific sysreg accessor In a number of cases, we perform a Read-Modify-Write operation on a system register, meaning that we would apply the RESx masks twice. Instead, provide a new accessor that performs this RMW operation, allowing the masks to be applied exactly once per operation. Reviewed-by: Miguel Luis Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250603070824.1192795-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 11 +++++++++++ arch/arm64/kvm/debug.c | 4 ++-- arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 4 ++-- arch/arm64/kvm/nested.c | 2 +- arch/arm64/kvm/pmu-emul.c | 10 +++++----- arch/arm64/kvm/sys_regs.c | 22 +++++++++++----------- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3b84ad91116b..8b8c649f225b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1118,6 +1118,17 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); ctxt_sys_reg(ctxt, (r)) = __v; \ } while (0) +#define __vcpu_rmw_sys_reg(v, r, op, val) \ + do { \ + const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ + __v op (val); \ + if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + \ + ctxt_sys_reg(ctxt, (r)) = __v; \ + } while (0) + #define __vcpu_sys_reg(v,r) \ (*({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 0e4c805e7e89..1a7dab333f55 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -216,9 +216,9 @@ void kvm_debug_set_guest_ownership(struct kvm_vcpu *vcpu) void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val) { if (val & OSLAR_EL1_OSLK) - __vcpu_sys_reg(vcpu, OSLSR_EL1) |= OSLSR_EL1_OSLK; + __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, |=, OSLSR_EL1_OSLK); else - __vcpu_sys_reg(vcpu, OSLSR_EL1) &= ~OSLSR_EL1_OSLK; + __vcpu_rmw_sys_reg(vcpu, OSLSR_EL1, &=, ~OSLSR_EL1_OSLK); preempt_disable(); kvm_arch_vcpu_put(vcpu); diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c index 34c7bf7fe9de..73e4bc7fde9e 100644 --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c @@ -70,8 +70,8 @@ static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu) */ val = read_sysreg_el1(SYS_CNTKCTL); val &= CNTKCTL_VALID_BITS; - __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS; - __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val; + __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, &=, ~CNTKCTL_VALID_BITS); + __vcpu_rmw_sys_reg(vcpu, CNTHCTL_EL2, |=, val); } __vcpu_assign_sys_reg(vcpu, SP_EL2, read_sysreg(sp_el1)); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 4a53e4147fb0..5b191f4dc566 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1757,7 +1757,7 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) out: for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) - (void)__vcpu_sys_reg(vcpu, sr); + __vcpu_rmw_sys_reg(vcpu, sr, |=, 0); return 0; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 4f0ae8073f78..b03dbda7f1ab 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -510,7 +510,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, continue; /* Mark overflow */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(i)); if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(i + 1), @@ -556,7 +556,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, perf_event->attr.sample_period = period; perf_event->hw.sample_period = period; - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, BIT(idx)); if (kvm_pmu_counter_can_chain(pmc)) kvm_pmu_counter_increment(vcpu, BIT(idx + 1), @@ -914,9 +914,9 @@ void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu) { u64 mask = kvm_pmu_implemented_counter_mask(vcpu); - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask; - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask; - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask; + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, mask); + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, mask); + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, mask); kvm_pmu_reprogram_counter_mask(vcpu, mask); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 93d0ca7ed936..e89d345e42d0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -791,7 +791,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) mask |= GENMASK(n - 1, 0); reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= mask; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, mask); return __vcpu_sys_reg(vcpu, r->reg); } @@ -799,7 +799,7 @@ static u64 reset_pmu_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmevcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= GENMASK(31, 0); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, GENMASK(31, 0)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -811,7 +811,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) return 0; reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= kvm_pmu_evtyper_mask(vcpu->kvm); + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, kvm_pmu_evtyper_mask(vcpu->kvm)); return __vcpu_sys_reg(vcpu, r->reg); } @@ -819,7 +819,7 @@ static u64 reset_pmevtyper(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static u64 reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { reset_unknown(vcpu, r); - __vcpu_sys_reg(vcpu, r->reg) &= PMSELR_EL0_SEL_MASK; + __vcpu_rmw_sys_reg(vcpu, r->reg, &=, PMSELR_EL0_SEL_MASK); return __vcpu_sys_reg(vcpu, r->reg); } @@ -1103,10 +1103,10 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, val = p->regval & mask; if (r->Op2 & 0x1) /* accessing PMCNTENSET_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, |=, val); else /* accessing PMCNTENCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMCNTENSET_EL0, &=, ~val); kvm_pmu_reprogram_counter_mask(vcpu, val); } else { @@ -1129,10 +1129,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) /* accessing PMINTENSET_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, |=, val); else /* accessing PMINTENCLR_EL1 */ - __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val; + __vcpu_rmw_sys_reg(vcpu, PMINTENSET_EL1, &=, ~val); } else { p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1); } @@ -1151,10 +1151,10 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (p->is_write) { if (r->CRm & 0x2) /* accessing PMOVSSET_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, |=, (p->regval & mask)); else /* accessing PMOVSCLR_EL0 */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask); + __vcpu_rmw_sys_reg(vcpu, PMOVSSET_EL0, &=, ~(p->regval & mask)); } else { p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); } @@ -4786,7 +4786,7 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) r->reset(vcpu, r); if (r->reg >= __SANITISED_REG_START__ && r->reg < NR_SYS_REGS) - (void)__vcpu_sys_reg(vcpu, r->reg); + __vcpu_rmw_sys_reg(vcpu, r->reg, |=, 0); } set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags); From b61fae4ee120f337b8700dff43b2fd639f3bf6a5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jun 2025 08:08:23 +0100 Subject: [PATCH 014/497] KVM: arm64: Don't use __vcpu_sys_reg() to get the address of a sysreg We are about to prevent the use of __vcpu_sys_reg() as a lvalue, and getting the address of a rvalue is not a thing. Update the couple of places where we do this to use the __ctxt_sys_reg() accessor, which return the address of a register. Reviewed-by: Miguel Luis Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250603070824.1192795-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 2 +- arch/arm64/kvm/fpsimd.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 5a67a6d4d95b..c6b4fb4c8e08 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1036,7 +1036,7 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) if (vcpu_has_nv(vcpu)) { struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset; - offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); + offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2); offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 7f6e43d25691..8f6c8f57c6b9 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -103,8 +103,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) fp_state.sve_state = vcpu->arch.sve_state; fp_state.sve_vl = vcpu->arch.sve_max_vl; fp_state.sme_state = NULL; - fp_state.svcr = &__vcpu_sys_reg(vcpu, SVCR); - fp_state.fpmr = &__vcpu_sys_reg(vcpu, FPMR); + fp_state.svcr = __ctxt_sys_reg(&vcpu->arch.ctxt, SVCR); + fp_state.fpmr = __ctxt_sys_reg(&vcpu->arch.ctxt, FPMR); fp_state.fp_type = &vcpu->arch.fp_type; if (vcpu_has_sve(vcpu)) From b5fa1f91e11fdf74ad4e2ac6dae246a57cbd2d95 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 3 Jun 2025 08:08:24 +0100 Subject: [PATCH 015/497] KVM: arm64: Make __vcpu_sys_reg() a pure rvalue operand Now that we don't have any use of __vcpu_sys_reg() as a lvalue, remove the in-place update, and directly return the sanitised value. Reviewed-by: Miguel Luis Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250603070824.1192795-5-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8b8c649f225b..382b382d14da 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1130,13 +1130,13 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64); } while (0) #define __vcpu_sys_reg(v,r) \ - (*({ \ + ({ \ const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \ - u64 *__r = __ctxt_sys_reg(ctxt, (r)); \ + u64 __v = ctxt_sys_reg(ctxt, (r)); \ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \ - *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\ - __r; \ - })) + __v = kvm_vcpu_apply_reg_masks((v), (r), __v); \ + __v; \ + }) u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); From 9a9864fd09c7e52440c05e70609bf5ee8da425d0 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 5 Jun 2025 12:36:10 +0200 Subject: [PATCH 016/497] KVM: arm64: selftests: Fix help text for arch_timer_edge_cases Fix the help text for arch_timer_edge_cases to show the correct option for setting the wait time. Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20250605103613.14544-2-sebott@redhat.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a36a7e2db434..c4716e0c1438 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -986,7 +986,7 @@ static void test_print_help(char *name) pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", LONG_WAIT_TEST_MS); - pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n", WAIT_TEST_MS); pr_info("\t-p: Test physical timer (default: true)\n"); pr_info("\t-v: Test virtual timer (default: true)\n"); From 050632ae6571d0f148fa0d4b90b6409a12645461 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 5 Jun 2025 12:36:11 +0200 Subject: [PATCH 017/497] KVM: arm64: selftests: Fix thread migration in arch_timer_edge_cases arch_timer_edge_cases tries to migrate itself across host cpus. Before the first test, it migrates to cpu 0 by setting up an affinity mask with only bit 0 set. After that it looks for the next possible cpu in the current affinity mask which still has only bit 0 set. So there is no migration at all. Fix this by reading the default mask at start and use this to find the next cpu in each iteration. Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20250605103613.14544-3-sebott@redhat.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index c4716e0c1438..a813b4c6c817 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -849,17 +849,17 @@ static void guest_code(enum arch_timer timer) GUEST_DONE(); } +static cpu_set_t default_cpuset; + static uint32_t next_pcpu(void) { uint32_t max = get_nprocs(); uint32_t cur = sched_getcpu(); uint32_t next = cur; - cpu_set_t cpuset; + cpu_set_t cpuset = default_cpuset; TEST_ASSERT(max > 1, "Need at least two physical cpus"); - sched_getaffinity(0, sizeof(cpuset), &cpuset); - do { next = (next + 1) % CPU_SETSIZE; } while (!CPU_ISSET(next, &cpuset)); @@ -1046,6 +1046,8 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv)) exit(KSFT_SKIP); + sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); test_run(vm, vcpu); From 05ce38d489dbc96eb1dd700b287f949cb8cc0610 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 5 Jun 2025 12:36:12 +0200 Subject: [PATCH 018/497] KVM: arm64: selftests: Fix xVAL init in arch_timer_edge_cases arch_timer_edge_cases hits the following assertion in < 10% of the test runs: ==== Test Assertion Failure ==== arm64/arch_timer_edge_cases.c:490: timer_get_cntct(timer) >= DEF_CNT + (timer_get_cntfrq() * (uint64_t)(delta_2_ms) / 1000) pid=17110 tid=17110 errno=4 - Interrupted system call 1 0x0000000000404ec7: test_run at arch_timer_edge_cases.c:945 2 0x0000000000401fa3: main at arch_timer_edge_cases.c:1074 3 0x0000ffffa774b587: ?? ??:0 4 0x0000ffffa774b65f: ?? ??:0 5 0x000000000040206f: _start at ??:? timer_get_cntct(timer) >= DEF_CNT + msec_to_cycles(delta_2_ms) Enabling the timer without proper xval initialization in set_tval_irq() resulted in an early interrupt during timer reprogramming. Make sure to set the xval before setting the enable bit. Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20250605103613.14544-4-sebott@redhat.com Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a813b4c6c817..be8bbedc933b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -191,8 +191,8 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, { atomic_set(&shared_data.handled, 0); atomic_set(&shared_data.spurious, 0); - timer_set_ctl(timer, ctl); timer_set_tval(timer, tval_cycles); + timer_set_ctl(timer, ctl); } static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, From fad4cf944839da7f5c3376243aa353295c88f588 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 5 Jun 2025 12:36:13 +0200 Subject: [PATCH 019/497] KVM: arm64: selftests: Determine effective counter width in arch_timer_edge_cases arch_timer_edge_cases uses ~0 as the maximum counter value, however there's no architectural guarantee that this is valid. Figure out the effective counter width based on the effective frequency like it's done by the kernel. This also serves as a workaround for AC03_CPU_14 that led to the following assertion failure on ampere-one machines: ==== Test Assertion Failure ==== arm64/arch_timer_edge_cases.c:169: timer_condition == istatus pid=11236 tid=11236 errno=4 - Interrupted system call 1 0x0000000000404ce7: test_run at arch_timer_edge_cases.c:938 2 0x0000000000401ebb: main at arch_timer_edge_cases.c:1053 3 0x0000ffff9fa8625b: ?? ??:0 4 0x0000ffff9fa8633b: ?? ??:0 5 0x0000000000401fef: _start at ??:? 0x1 != 0x0 (timer_condition != istatus) Note that the following subtest only worked since the counter initialized with CVAL_MAX would instantly overflow (which is no longer the case): test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); To fix this we could swap CVAL_MAX for 0 here but since that is already done by test_move_counters_behind_timers() let's remove that subtest. Link: https://lore.kernel.org/kvmarm/ac1de1d2-ef2b-d439-dc48-8615e121b07b@redhat.com Link: https://amperecomputing.com/assets/AmpereOne_Developer_ER_v0_80_20240823_28945022f4.pdf Signed-off-by: Sebastian Ott Link: https://lore.kernel.org/r/20250605103613.14544-5-sebott@redhat.com Signed-off-by: Marc Zyngier --- .../kvm/arm64/arch_timer_edge_cases.c | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index be8bbedc933b..b4d22b3ab7cc 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -22,7 +22,8 @@ #include "gic.h" #include "vgic.h" -static const uint64_t CVAL_MAX = ~0ULL; +/* Depends on counter width. */ +static uint64_t CVAL_MAX; /* tval is a signed 32-bit int. */ static const int32_t TVAL_MAX = INT32_MAX; static const int32_t TVAL_MIN = INT32_MIN; @@ -30,8 +31,8 @@ static const int32_t TVAL_MIN = INT32_MIN; /* After how much time we say there is no IRQ. */ static const uint32_t TIMEOUT_NO_IRQ_US = 50000; -/* A nice counter value to use as the starting one for most tests. */ -static const uint64_t DEF_CNT = (CVAL_MAX / 2); +/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ +static uint64_t DEF_CNT; /* Number of runs. */ static const uint32_t NR_TEST_ITERS_DEF = 5; @@ -732,12 +733,6 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer) test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, wm); } - - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - sleep_method_t sm = sleep_method[i]; - - test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); - } } /* @@ -975,6 +970,8 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, test_init_timer_irq(*vm, *vcpu); vgic_v3_setup(*vm, 1, 64); sync_global_to_guest(*vm, test_args); + sync_global_to_guest(*vm, CVAL_MAX); + sync_global_to_guest(*vm, DEF_CNT); } static void test_print_help(char *name) @@ -1035,6 +1032,17 @@ static bool parse_args(int argc, char *argv[]) return false; } +static void set_counter_defaults(void) +{ + const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; + uint64_t freq = read_sysreg(CNTFRQ_EL0); + uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq); + + width = clamp(width, 56, 64); + CVAL_MAX = GENMASK_ULL(width - 1, 0); + DEF_CNT = CVAL_MAX / 2; +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -1047,6 +1055,7 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + set_counter_defaults(); if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); From 308a3a8ce8ea41b26c46169f3263e50f5997c28e Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sat, 31 May 2025 18:24:58 +0300 Subject: [PATCH 020/497] Bluetooth: hci_core: fix list_for_each_entry_rcu usage Releasing + re-acquiring RCU lock inside list_for_each_entry_rcu() loop body is not correct. Fix by taking the update-side hdev->lock instead. Fixes: c7eaf80bfb0c ("Bluetooth: Fix hci_link_tx_to RCU lock usage") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_core.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 3b49828160b7..04845ff3ad57 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3417,23 +3417,18 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) bt_dev_err(hdev, "link tx timeout"); - rcu_read_lock(); + hci_dev_lock(hdev); /* Kill stalled connections */ - list_for_each_entry_rcu(c, &h->list, list) { + list_for_each_entry(c, &h->list, list) { if (c->type == type && c->sent) { bt_dev_err(hdev, "killing stalled connection %pMR", &c->dst); - /* hci_disconnect might sleep, so, we have to release - * the RCU read lock before calling it. - */ - rcu_read_unlock(); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); - rcu_read_lock(); } } - rcu_read_unlock(); + hci_dev_unlock(hdev); } static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, From daabd276985055250528da97e9ce6d277d7009c2 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Tue, 3 Jun 2025 15:34:38 +0530 Subject: [PATCH 021/497] Bluetooth: btintel_pcie: Fix driver not posting maximum rx buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver was posting only 6 rx buffers, despite the maximum rx buffers being defined as 16. Having fewer RX buffers caused firmware exceptions in HID use cases when events arrived in bursts. Exception seen on android 6.12 kernel. E Bluetooth: hci0: Received hw exception interrupt E Bluetooth: hci0: Received gp1 mailbox interrupt D Bluetooth: hci0: 00000000: ff 3e 87 80 03 01 01 01 03 01 0c 0d 02 1c 10 0e D Bluetooth: hci0: 00000010: 01 00 05 14 66 b0 28 b0 c0 b0 28 b0 ac af 28 b0 D Bluetooth: hci0: 00000020: 14 f1 28 b0 00 00 00 00 fa 04 00 00 00 00 40 10 D Bluetooth: hci0: 00000030: 08 00 00 00 7a 7a 7a 7a 47 00 fb a0 10 00 00 00 D Bluetooth: hci0: 00000000: 10 01 0a E Bluetooth: hci0: ---- Dump of debug registers — E Bluetooth: hci0: boot stage: 0xe0fb0047 E Bluetooth: hci0: ipc status: 0x00000004 E Bluetooth: hci0: ipc control: 0x00000000 E Bluetooth: hci0: ipc sleep control: 0x00000000 E Bluetooth: hci0: mbox_1: 0x00badbad E Bluetooth: hci0: mbox_2: 0x0000101c E Bluetooth: hci0: mbox_3: 0x00000008 E Bluetooth: hci0: mbox_4: 0x7a7a7a7a Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 3 ++- drivers/bluetooth/btintel_pcie.h | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 50fe17f1e1d1..2c7731803c9f 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -396,8 +396,9 @@ static int btintel_pcie_submit_rx(struct btintel_pcie_data *data) static int btintel_pcie_start_rx(struct btintel_pcie_data *data) { int i, ret; + struct rxq *rxq = &data->rxq; - for (i = 0; i < BTINTEL_PCIE_RX_MAX_QUEUE; i++) { + for (i = 0; i < rxq->count; i++) { ret = btintel_pcie_submit_rx(data); if (ret) return ret; diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 21b964b15c1c..5ddd6d7d8d45 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -177,9 +177,6 @@ enum { /* Doorbell vector for TFD */ #define BTINTEL_PCIE_TX_DB_VEC 0 -/* Number of pending RX requests for downlink */ -#define BTINTEL_PCIE_RX_MAX_QUEUE 6 - /* Doorbell vector for FRBD */ #define BTINTEL_PCIE_RX_DB_VEC 513 From 2dd711102ce69ae41f65d09c012441227d4aa983 Mon Sep 17 00:00:00 2001 From: Chandrashekar Devegowda Date: Tue, 3 Jun 2025 15:34:39 +0530 Subject: [PATCH 022/497] Bluetooth: btintel_pcie: Increase the tx and rx descriptor count This change addresses latency issues observed in HID use cases where events arrive in bursts. By increasing the Rx descriptor count to 64, the firmware can handle bursty data more effectively, reducing latency and preventing buffer overflows. Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 24 ++++++++++++------------ drivers/bluetooth/btintel_pcie.h | 7 +++++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 2c7731803c9f..03f13de4a723 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1783,8 +1783,8 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) * + size of index * Number of queues(2) * type of index array(4) * + size of context information */ - total = (sizeof(struct tfd) + sizeof(struct urbd0) + sizeof(struct frbd) - + sizeof(struct urbd1)) * BTINTEL_DESCS_COUNT; + total = (sizeof(struct tfd) + sizeof(struct urbd0)) * BTINTEL_PCIE_TX_DESCS_COUNT; + total += (sizeof(struct frbd) + sizeof(struct urbd1)) * BTINTEL_PCIE_RX_DESCS_COUNT; /* Add the sum of size of index array and size of ci struct */ total += (sizeof(u16) * BTINTEL_PCIE_NUM_QUEUES * 4) + sizeof(struct ctx_info); @@ -1809,36 +1809,36 @@ static int btintel_pcie_alloc(struct btintel_pcie_data *data) data->dma_v_addr = v_addr; /* Setup descriptor count */ - data->txq.count = BTINTEL_DESCS_COUNT; - data->rxq.count = BTINTEL_DESCS_COUNT; + data->txq.count = BTINTEL_PCIE_TX_DESCS_COUNT; + data->rxq.count = BTINTEL_PCIE_RX_DESCS_COUNT; /* Setup tfds */ data->txq.tfds_p_addr = p_addr; data->txq.tfds = v_addr; - p_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct tfd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct tfd) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup urbd0 */ data->txq.urbd0s_p_addr = p_addr; data->txq.urbd0s = v_addr; - p_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd0) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); + v_addr += (sizeof(struct urbd0) * BTINTEL_PCIE_TX_DESCS_COUNT); /* Setup FRBD*/ data->rxq.frbds_p_addr = p_addr; data->rxq.frbds = v_addr; - p_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct frbd) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct frbd) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup urbd1 */ data->rxq.urbd1s_p_addr = p_addr; data->rxq.urbd1s = v_addr; - p_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); - v_addr += (sizeof(struct urbd1) * BTINTEL_DESCS_COUNT); + p_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); + v_addr += (sizeof(struct urbd1) * BTINTEL_PCIE_RX_DESCS_COUNT); /* Setup data buffers for txq */ err = btintel_pcie_setup_txq_bufs(data, &data->txq); diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h index 5ddd6d7d8d45..7dad4523236c 100644 --- a/drivers/bluetooth/btintel_pcie.h +++ b/drivers/bluetooth/btintel_pcie.h @@ -154,8 +154,11 @@ enum msix_mbox_int_causes { /* Default interrupt timeout in msec */ #define BTINTEL_DEFAULT_INTR_TIMEOUT_MS 3000 -/* The number of descriptors in TX/RX queues */ -#define BTINTEL_DESCS_COUNT 16 +/* The number of descriptors in TX queues */ +#define BTINTEL_PCIE_TX_DESCS_COUNT 32 + +/* The number of descriptors in RX queues */ +#define BTINTEL_PCIE_RX_DESCS_COUNT 64 /* Number of Queue for TX and RX * It indicates the index of the IA(Index Array) From bf2ffc4d14db29cab781549912d2dc69127f4d3e Mon Sep 17 00:00:00 2001 From: Chandrashekar Devegowda Date: Tue, 3 Jun 2025 15:34:40 +0530 Subject: [PATCH 023/497] Bluetooth: btintel_pcie: Reduce driver buffer posting to prevent race condition Modify the driver to post 3 fewer buffers than the maximum rx buffers (64) allowed for the firmware. This change mitigates a hardware issue causing a race condition in the firmware, improving stability and data handling. Signed-off-by: Chandrashekar Devegowda Signed-off-by: Kiran K Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 03f13de4a723..563165c5efae 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -398,7 +398,11 @@ static int btintel_pcie_start_rx(struct btintel_pcie_data *data) int i, ret; struct rxq *rxq = &data->rxq; - for (i = 0; i < rxq->count; i++) { + /* Post (BTINTEL_PCIE_RX_DESCS_COUNT - 3) buffers to overcome the + * hardware issues leading to race condition at the firmware. + */ + + for (i = 0; i < rxq->count - 3; i++) { ret = btintel_pcie_submit_rx(data); if (ret) return ret; From e6ed54e86aae9e4f7286ce8d5c73780f91b48d1c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 3 Jun 2025 16:12:39 -0400 Subject: [PATCH 024/497] Bluetooth: MGMT: Fix UAF on mgmt_remove_adv_monitor_complete This reworks MGMT_OP_REMOVE_ADV_MONITOR to not use mgmt_pending_add to avoid crashes like bellow: ================================================================== BUG: KASAN: slab-use-after-free in mgmt_remove_adv_monitor_complete+0xe5/0x540 net/bluetooth/mgmt.c:5406 Read of size 8 at addr ffff88801c53f318 by task kworker/u5:5/5341 CPU: 0 UID: 0 PID: 5341 Comm: kworker/u5:5 Not tainted 6.15.0-syzkaller-10402-g4cb6c8af8591 #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xd2/0x2b0 mm/kasan/report.c:521 kasan_report+0x118/0x150 mm/kasan/report.c:634 mgmt_remove_adv_monitor_complete+0xe5/0x540 net/bluetooth/mgmt.c:5406 hci_cmd_sync_work+0x261/0x3a0 net/bluetooth/hci_sync.c:334 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xade/0x17b0 kernel/workqueue.c:3321 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3402 kthread+0x711/0x8a0 kernel/kthread.c:464 ret_from_fork+0x3fc/0x770 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Allocated by task 5987: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x230/0x3d0 mm/slub.c:4358 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1039 [inline] mgmt_pending_new+0x65/0x240 net/bluetooth/mgmt_util.c:252 mgmt_pending_add+0x34/0x120 net/bluetooth/mgmt_util.c:279 remove_adv_monitor+0x103/0x1b0 net/bluetooth/mgmt.c:5454 hci_mgmt_cmd+0x9c9/0xef0 net/bluetooth/hci_sock.c:1719 hci_sock_sendmsg+0x6ca/0xef0 net/bluetooth/hci_sock.c:1839 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:727 sock_write_iter+0x258/0x330 net/socket.c:1131 new_sync_write fs/read_write.c:593 [inline] vfs_write+0x548/0xa90 fs/read_write.c:686 ksys_write+0x145/0x250 fs/read_write.c:738 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 5989: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x62/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2380 [inline] slab_free mm/slub.c:4642 [inline] kfree+0x18e/0x440 mm/slub.c:4841 mgmt_pending_foreach+0xc9/0x120 net/bluetooth/mgmt_util.c:242 mgmt_index_removed+0x10d/0x2f0 net/bluetooth/mgmt.c:9366 hci_sock_bind+0xbe9/0x1000 net/bluetooth/hci_sock.c:1314 __sys_bind_socket net/socket.c:1810 [inline] __sys_bind+0x2c3/0x3e0 net/socket.c:1841 __do_sys_bind net/socket.c:1846 [inline] __se_sys_bind net/socket.c:1844 [inline] __x64_sys_bind+0x7a/0x90 net/socket.c:1844 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 66bd095ab5d4 ("Bluetooth: advmon offload MSFT remove monitor") Closes: https://syzkaller.appspot.com/bug?extid=feb0dc579bbe30a13190 Reported-by: syzbot+feb0dc579bbe30a13190@syzkaller.appspotmail.com Tested-by: syzbot+feb0dc579bbe30a13190@syzkaller.appspotmail.com Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_core.c | 4 +--- net/bluetooth/mgmt.c | 37 ++++++++++---------------------- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2b261e74e2c4..93fcb659f0d4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2400,7 +2400,6 @@ void mgmt_advertising_added(struct sock *sk, struct hci_dev *hdev, u8 instance); void mgmt_advertising_removed(struct sock *sk, struct hci_dev *hdev, u8 instance); -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle); int mgmt_phy_configuration_changed(struct hci_dev *hdev, struct sock *skip); void mgmt_adv_monitor_device_lost(struct hci_dev *hdev, u16 handle, bdaddr_t *bdaddr, u8 addr_type); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 04845ff3ad57..aeda2e4557d5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1877,10 +1877,8 @@ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) if (monitor->handle) idr_remove(&hdev->adv_monitors_idr, monitor->handle); - if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) { + if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) hdev->adv_monitors_cnt--; - mgmt_adv_monitor_removed(hdev, monitor->handle); - } kfree(monitor); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 14a9462fced5..feaeec2423ae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5108,24 +5108,14 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, mgmt_event(MGMT_EV_ADV_MONITOR_ADDED, hdev, &ev, sizeof(ev), sk); } -void mgmt_adv_monitor_removed(struct hci_dev *hdev, u16 handle) +static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, + u16 handle) { struct mgmt_ev_adv_monitor_removed ev; - struct mgmt_pending_cmd *cmd; - struct sock *sk_skip = NULL; - struct mgmt_cp_remove_adv_monitor *cp; - - cmd = pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev); - if (cmd) { - cp = cmd->param; - - if (cp->monitor_handle) - sk_skip = cmd->sk; - } ev.monitor_handle = cpu_to_le16(handle); - mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk_skip); + mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); } static int read_adv_mon_features(struct sock *sk, struct hci_dev *hdev, @@ -5227,8 +5217,7 @@ static int __add_adv_patterns_monitor(struct sock *sk, struct hci_dev *hdev, if (pending_find(MGMT_OP_SET_LE, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || - pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) { + pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } @@ -5398,8 +5387,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, struct mgmt_pending_cmd *cmd = data; struct mgmt_cp_remove_adv_monitor *cp; - if (status == -ECANCELED || - cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) + if (status == -ECANCELED) return; hci_dev_lock(hdev); @@ -5408,12 +5396,14 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, rp.monitor_handle = cp->monitor_handle; - if (!status) + if (!status) { + mgmt_adv_monitor_removed(cmd->sk, hdev, cp->monitor_handle); hci_update_passive_scan(hdev); + } mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); hci_dev_unlock(hdev); bt_dev_dbg(hdev, "remove monitor %d complete, status %d", @@ -5423,10 +5413,6 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, static int mgmt_remove_adv_monitor_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - - if (cmd != pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev)) - return -ECANCELED; - struct mgmt_cp_remove_adv_monitor *cp = cmd->param; u16 handle = __le16_to_cpu(cp->monitor_handle); @@ -5445,14 +5431,13 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, hci_dev_lock(hdev); if (pending_find(MGMT_OP_SET_LE, hdev) || - pending_find(MGMT_OP_REMOVE_ADV_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR, hdev) || pending_find(MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, hdev)) { status = MGMT_STATUS_BUSY; goto unlock; } - cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); + cmd = mgmt_pending_new(sk, MGMT_OP_REMOVE_ADV_MONITOR, hdev, data, len); if (!cmd) { status = MGMT_STATUS_NO_RESOURCES; goto unlock; @@ -5462,7 +5447,7 @@ static int remove_adv_monitor(struct sock *sk, struct hci_dev *hdev, mgmt_remove_adv_monitor_complete); if (err) { - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); if (err == -ENOMEM) status = MGMT_STATUS_NO_RESOURCES; From 6fe26f694c824b8a4dbf50c635bee1302e3f099c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 20 May 2025 15:42:21 -0400 Subject: [PATCH 025/497] Bluetooth: MGMT: Protect mgmt_pending list with its own lock This uses a mutex to protect from concurrent access of mgmt_pending list which can cause crashes like: ================================================================== BUG: KASAN: slab-use-after-free in hci_sock_get_channel+0x60/0x68 net/bluetooth/hci_sock.c:91 Read of size 2 at addr ffff0000c48885b2 by task syz.4.334/7318 CPU: 0 UID: 0 PID: 7318 Comm: syz.4.334 Not tainted 6.15.0-rc7-syzkaller-g187899f4124a #0 PREEMPT Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call trace: show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:466 (C) __dump_stack+0x30/0x40 lib/dump_stack.c:94 dump_stack_lvl+0xd8/0x12c lib/dump_stack.c:120 print_address_description+0xa8/0x254 mm/kasan/report.c:408 print_report+0x68/0x84 mm/kasan/report.c:521 kasan_report+0xb0/0x110 mm/kasan/report.c:634 __asan_report_load2_noabort+0x20/0x2c mm/kasan/report_generic.c:379 hci_sock_get_channel+0x60/0x68 net/bluetooth/hci_sock.c:91 mgmt_pending_find+0x7c/0x140 net/bluetooth/mgmt_util.c:223 pending_find net/bluetooth/mgmt.c:947 [inline] remove_adv_monitor+0x44/0x1a4 net/bluetooth/mgmt.c:5445 hci_mgmt_cmd+0x780/0xc00 net/bluetooth/hci_sock.c:1712 hci_sock_sendmsg+0x544/0xbb0 net/bluetooth/hci_sock.c:1832 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg net/socket.c:727 [inline] sock_write_iter+0x25c/0x378 net/socket.c:1131 new_sync_write fs/read_write.c:591 [inline] vfs_write+0x62c/0x97c fs/read_write.c:684 ksys_write+0x120/0x210 fs/read_write.c:736 __do_sys_write fs/read_write.c:747 [inline] __se_sys_write fs/read_write.c:744 [inline] __arm64_sys_write+0x7c/0x90 fs/read_write.c:744 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Allocated by task 7037: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x44/0x54 mm/kasan/generic.c:562 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0x9c/0xb4 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4327 [inline] __kmalloc_noprof+0x2fc/0x4c8 mm/slub.c:4339 kmalloc_noprof include/linux/slab.h:909 [inline] sk_prot_alloc+0xc4/0x1f0 net/core/sock.c:2198 sk_alloc+0x44/0x3ac net/core/sock.c:2254 bt_sock_alloc+0x4c/0x300 net/bluetooth/af_bluetooth.c:148 hci_sock_create+0xa8/0x194 net/bluetooth/hci_sock.c:2202 bt_sock_create+0x14c/0x24c net/bluetooth/af_bluetooth.c:132 __sock_create+0x43c/0x91c net/socket.c:1541 sock_create net/socket.c:1599 [inline] __sys_socket_create net/socket.c:1636 [inline] __sys_socket+0xd4/0x1c0 net/socket.c:1683 __do_sys_socket net/socket.c:1697 [inline] __se_sys_socket net/socket.c:1695 [inline] __arm64_sys_socket+0x7c/0x94 net/socket.c:1695 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Freed by task 6607: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_free_info+0x58/0x70 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x68/0x88 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2380 [inline] slab_free mm/slub.c:4642 [inline] kfree+0x17c/0x474 mm/slub.c:4841 sk_prot_free net/core/sock.c:2237 [inline] __sk_destruct+0x4f4/0x760 net/core/sock.c:2332 sk_destruct net/core/sock.c:2360 [inline] __sk_free+0x320/0x430 net/core/sock.c:2371 sk_free+0x60/0xc8 net/core/sock.c:2382 sock_put include/net/sock.h:1944 [inline] mgmt_pending_free+0x88/0x118 net/bluetooth/mgmt_util.c:290 mgmt_pending_remove+0xec/0x104 net/bluetooth/mgmt_util.c:298 mgmt_set_powered_complete+0x418/0x5cc net/bluetooth/mgmt.c:1355 hci_cmd_sync_work+0x204/0x33c net/bluetooth/hci_sync.c:334 process_one_work+0x7e8/0x156c kernel/workqueue.c:3238 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x958/0xed8 kernel/workqueue.c:3400 kthread+0x5fc/0x75c kernel/kthread.c:464 ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:847 Fixes: a380b6cff1a2 ("Bluetooth: Add generic mgmt helper API") Closes: https://syzkaller.appspot.com/bug?extid=0a7039d5d9986ff4ecec Closes: https://syzkaller.appspot.com/bug?extid=cc0cc52e7f43dc9e6df1 Reported-by: syzbot+0a7039d5d9986ff4ecec@syzkaller.appspotmail.com Tested-by: syzbot+0a7039d5d9986ff4ecec@syzkaller.appspotmail.com Tested-by: syzbot+cc0cc52e7f43dc9e6df1@syzkaller.appspotmail.com Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_core.c | 1 + net/bluetooth/mgmt.c | 101 +++++++++++++++---------------- net/bluetooth/mgmt_util.c | 32 ++++++++-- net/bluetooth/mgmt_util.h | 4 +- 5 files changed, 80 insertions(+), 59 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 93fcb659f0d4..f7b1a9eb9543 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -546,6 +546,7 @@ struct hci_dev { struct hci_conn_hash conn_hash; struct list_head mesh_pending; + struct mutex mgmt_pending_lock; struct list_head mgmt_pending; struct list_head reject_list; struct list_head accept_list; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index aeda2e4557d5..487c045a7ba8 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2485,6 +2485,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); + mutex_init(&hdev->mgmt_pending_lock); ida_init(&hdev->unset_handle_ida); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index feaeec2423ae..de7adb9a47f9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1447,22 +1447,17 @@ static void settings_rsp(struct mgmt_pending_cmd *cmd, void *data) send_settings_rsp(cmd->sk, cmd->opcode, match->hdev); - list_del(&cmd->list); - if (match->sk == NULL) { match->sk = cmd->sk; sock_hold(match->sk); } - - mgmt_pending_free(cmd); } static void cmd_status_rsp(struct mgmt_pending_cmd *cmd, void *data) { u8 *status = data; - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, *status); - mgmt_pending_remove(cmd); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, *status); } static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) @@ -1476,8 +1471,6 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) if (cmd->cmd_complete) { cmd->cmd_complete(cmd, match->mgmt_status); - mgmt_pending_remove(cmd); - return; } @@ -1486,13 +1479,13 @@ static void cmd_complete_rsp(struct mgmt_pending_cmd *cmd, void *data) static int generic_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, cmd->param_len); } static int addr_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) { - return mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, + return mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, cmd->param, sizeof(struct mgmt_addr_info)); } @@ -1532,7 +1525,7 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); goto done; } @@ -1707,7 +1700,7 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -1943,8 +1936,8 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) new_settings(hdev, NULL); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, cmd_status_rsp, - &mgmt_err); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, + cmd_status_rsp, &mgmt_err); return; } @@ -1954,7 +1947,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) changed = hci_dev_test_and_clear_flag(hdev, HCI_SSP_ENABLED); } - mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_SSP, hdev, true, settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -2074,12 +2067,12 @@ static void set_le_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp, - &status); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, cmd_status_rsp, + &status); return; } - mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -2138,7 +2131,7 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) struct sock *sk = cmd->sk; if (status) { - mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, + mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); return; } @@ -2638,7 +2631,7 @@ static void mgmt_class_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), hdev->dev_class, 3); mgmt_pending_free(cmd); @@ -3427,7 +3420,7 @@ static int pairing_complete(struct mgmt_pending_cmd *cmd, u8 status) bacpy(&rp.addr.bdaddr, &conn->dst); rp.addr.type = link_to_bdaddr(conn->type, conn->dst_type); - err = mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_PAIR_DEVICE, + err = mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_PAIR_DEVICE, status, &rp, sizeof(rp)); /* So we don't get further callbacks for this connection */ @@ -5186,7 +5179,7 @@ static void mgmt_add_adv_patterns_monitor_complete(struct hci_dev *hdev, hci_update_passive_scan(hdev); } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_remove(cmd); @@ -5401,7 +5394,7 @@ static void mgmt_remove_adv_monitor_complete(struct hci_dev *hdev, hci_update_passive_scan(hdev); } - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(status), &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -5777,7 +5770,7 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev)) return; - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -5998,7 +5991,7 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -6223,7 +6216,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) u8 status = mgmt_status(err); if (status) { - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, cmd_status_rsp, &status); return; } @@ -6233,7 +6226,7 @@ static void set_advertising_complete(struct hci_dev *hdev, void *data, int err) else hci_dev_clear_flag(hdev, HCI_ADVERTISING); - mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, settings_rsp, + mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev, true, settings_rsp, &match); new_settings(hdev, match.sk); @@ -6577,7 +6570,7 @@ static void set_bredr_complete(struct hci_dev *hdev, void *data, int err) */ hci_dev_clear_flag(hdev, HCI_BREDR_ENABLED); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); } else { send_settings_rsp(cmd->sk, MGMT_OP_SET_BREDR, hdev); new_settings(hdev, cmd->sk); @@ -6714,7 +6707,7 @@ static void set_secure_conn_complete(struct hci_dev *hdev, void *data, int err) if (err) { u8 mgmt_err = mgmt_status(err); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, mgmt_err); + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_err); goto done; } @@ -7161,7 +7154,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err) rp.max_tx_power = HCI_TX_POWER_INVALID; } - mgmt_cmd_complete(cmd->sk, cmd->index, MGMT_OP_GET_CONN_INFO, status, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, MGMT_OP_GET_CONN_INFO, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -7321,7 +7314,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, void *data, int err) } complete: - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, status, &rp, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, status, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -8571,10 +8564,10 @@ static void add_advertising_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); add_adv_complete(hdev, cmd->sk, cp->instance, err); @@ -8762,10 +8755,10 @@ static void add_ext_adv_params_complete(struct hci_dev *hdev, void *data, hci_remove_adv_instance(hdev, cp->instance); - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); } else { - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); } @@ -8912,10 +8905,10 @@ static void add_ext_adv_data_complete(struct hci_dev *hdev, void *data, int err) rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err), &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9074,10 +9067,10 @@ static void remove_advertising_complete(struct hci_dev *hdev, void *data, rp.instance = cp->instance; if (err) - mgmt_cmd_status(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_status(cmd->sk, cmd->hdev->id, cmd->opcode, mgmt_status(err)); else - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, + mgmt_cmd_complete(cmd->sk, cmd->hdev->id, cmd->opcode, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); mgmt_pending_free(cmd); @@ -9349,7 +9342,7 @@ void mgmt_index_removed(struct hci_dev *hdev) if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) return; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { mgmt_index_event(MGMT_EV_UNCONF_INDEX_REMOVED, hdev, NULL, 0, @@ -9387,7 +9380,8 @@ void mgmt_power_on(struct hci_dev *hdev, int err) hci_update_passive_scan(hdev); } - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); new_settings(hdev, match.sk); @@ -9402,7 +9396,8 @@ void __mgmt_power_off(struct hci_dev *hdev) struct cmd_lookup match = { NULL, hdev }; u8 zero_cod[] = { 0, 0, 0 }; - mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); + mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, true, settings_rsp, + &match); /* If the power off is because of hdev unregistration let * use the appropriate INVALID_INDEX status. Otherwise use @@ -9416,7 +9411,7 @@ void __mgmt_power_off(struct hci_dev *hdev) else match.mgmt_status = MGMT_STATUS_NOT_POWERED; - mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &match); + mgmt_pending_foreach(0, hdev, true, cmd_complete_rsp, &match); if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, @@ -9657,7 +9652,6 @@ static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) device_unpaired(hdev, &cp->addr.bdaddr, cp->addr.type, cmd->sk); cmd->cmd_complete(cmd, 0); - mgmt_pending_remove(cmd); } bool mgmt_powering_down(struct hci_dev *hdev) @@ -9713,8 +9707,8 @@ void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, struct mgmt_cp_disconnect *cp; struct mgmt_pending_cmd *cmd; - mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, - hdev); + mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, true, + unpair_device_rsp, hdev); cmd = pending_find(MGMT_OP_DISCONNECT, hdev); if (!cmd) @@ -9907,7 +9901,7 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) if (status) { u8 mgmt_err = mgmt_status(status); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, cmd_status_rsp, &mgmt_err); return; } @@ -9917,8 +9911,8 @@ void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status) else changed = hci_dev_test_and_clear_flag(hdev, HCI_LINK_SECURITY); - mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, settings_rsp, - &match); + mgmt_pending_foreach(MGMT_OP_SET_LINK_SECURITY, hdev, true, + settings_rsp, &match); if (changed) new_settings(hdev, match.sk); @@ -9942,9 +9936,12 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, { struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; - mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); - mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); + mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, false, sk_lookup, + &match); + mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, false, sk_lookup, + &match); if (!status) { mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 3713ff490c65..a88a07da3947 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -217,30 +217,47 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev) { - struct mgmt_pending_cmd *cmd; + struct mgmt_pending_cmd *cmd, *tmp; - list_for_each_entry(cmd, &hdev->mgmt_pending, list) { + mutex_lock(&hdev->mgmt_pending_lock); + + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (hci_sock_get_channel(cmd->sk) != channel) continue; - if (cmd->opcode == opcode) + + if (cmd->opcode == opcode) { + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; + } } + mutex_unlock(&hdev->mgmt_pending_lock); + return NULL; } -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data) { struct mgmt_pending_cmd *cmd, *tmp; + mutex_lock(&hdev->mgmt_pending_lock); + list_for_each_entry_safe(cmd, tmp, &hdev->mgmt_pending, list) { if (opcode > 0 && cmd->opcode != opcode) continue; + if (remove) + list_del(&cmd->list); + cb(cmd, data); + + if (remove) + mgmt_pending_free(cmd); } + + mutex_unlock(&hdev->mgmt_pending_lock); } struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, @@ -254,7 +271,7 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, return NULL; cmd->opcode = opcode; - cmd->index = hdev->id; + cmd->hdev = hdev; cmd->param = kmemdup(data, len, GFP_KERNEL); if (!cmd->param) { @@ -280,7 +297,9 @@ struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, if (!cmd) return NULL; + mutex_lock(&hdev->mgmt_pending_lock); list_add_tail(&cmd->list, &hdev->mgmt_pending); + mutex_unlock(&hdev->mgmt_pending_lock); return cmd; } @@ -294,7 +313,10 @@ void mgmt_pending_free(struct mgmt_pending_cmd *cmd) void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) { + mutex_lock(&cmd->hdev->mgmt_pending_lock); list_del(&cmd->list); + mutex_unlock(&cmd->hdev->mgmt_pending_lock); + mgmt_pending_free(cmd); } diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index f2ba994ab1d8..024e51dd6937 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -33,7 +33,7 @@ struct mgmt_mesh_tx { struct mgmt_pending_cmd { struct list_head list; u16 opcode; - int index; + struct hci_dev *hdev; void *param; size_t param_len; struct sock *sk; @@ -54,7 +54,7 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, struct mgmt_pending_cmd *mgmt_pending_find(unsigned short channel, u16 opcode, struct hci_dev *hdev); -void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, +void mgmt_pending_foreach(u16 opcode, struct hci_dev *hdev, bool remove, void (*cb)(struct mgmt_pending_cmd *cmd, void *data), void *data); struct mgmt_pending_cmd *mgmt_pending_add(struct sock *sk, u16 opcode, From 692eb9f8a5b71d852e873375d20cf5da7a046ea6 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 2 Jun 2025 21:49:14 +0200 Subject: [PATCH 026/497] net: dsa: b53: fix untagged traffic sent via cpu tagged with VID 0 When Linux sends out untagged traffic from a port, it will enter the CPU port without any VLAN tag, even if the port is a member of a vlan filtering bridge with a PVID egress untagged VLAN. This makes the CPU port's PVID take effect, and the PVID's VLAN table entry controls if the packet will be tagged on egress. Since commit 45e9d59d3950 ("net: dsa: b53: do not allow to configure VLAN 0") we remove bridged ports from VLAN 0 when joining or leaving a VLAN aware bridge. But we also clear the untagged bit, causing untagged traffic from the controller to become tagged with VID 0 (and priority 0). Fix this by not touching the untagged map of VLAN 0. Additionally, always keep the CPU port as a member, as the untag map is only effective as long as there is at least one member, and we would remove it when bridging all ports and leaving no standalone ports. Since Linux (and the switch) treats VLAN 0 tagged traffic like untagged, the actual impact of this is rather low, but this also prevented earlier detection of the issue. Fixes: 45e9d59d3950 ("net: dsa: b53: do not allow to configure VLAN 0") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20250602194914.1011890-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 862bdccb7439..dc2f4adac9bc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2034,9 +2034,6 @@ int b53_br_join(struct dsa_switch *ds, int port, struct dsa_bridge bridge, b53_get_vlan_entry(dev, pvid, vl); vl->members &= ~BIT(port); - if (vl->members == BIT(cpu_port)) - vl->members &= ~BIT(cpu_port); - vl->untag = vl->members; b53_set_vlan_entry(dev, pvid, vl); } @@ -2115,8 +2112,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge) } b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(cpu_port); - vl->untag |= BIT(port) | BIT(cpu_port); + vl->members |= BIT(port); b53_set_vlan_entry(dev, pvid, vl); } } From 87f7ce260a3c838b49e1dc1ceedf1006795157a2 Mon Sep 17 00:00:00 2001 From: Jeongjun Park Date: Wed, 21 May 2025 01:07:17 +0900 Subject: [PATCH 027/497] ptp: remove ptp->n_vclocks check logic in ptp_vclock_in_use() There is no disagreement that we should check both ptp->is_virtual_clock and ptp->n_vclocks to check if the ptp virtual clock is in use. However, when we acquire ptp->n_vclocks_mux to read ptp->n_vclocks in ptp_vclock_in_use(), we observe a recursive lock in the call trace starting from n_vclocks_store(). ============================================ WARNING: possible recursive locking detected 6.15.0-rc6 #1 Not tainted -------------------------------------------- syz.0.1540/13807 is trying to acquire lock: ffff888035a24868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: ptp_vclock_in_use drivers/ptp/ptp_private.h:103 [inline] ffff888035a24868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: ptp_clock_unregister+0x21/0x250 drivers/ptp/ptp_clock.c:415 but task is already holding lock: ffff888030704868 (&ptp->n_vclocks_mux){+.+.}-{4:4}, at: n_vclocks_store+0xf1/0x6d0 drivers/ptp/ptp_sysfs.c:215 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&ptp->n_vclocks_mux); lock(&ptp->n_vclocks_mux); *** DEADLOCK *** .... ============================================ The best way to solve this is to remove the logic that checks ptp->n_vclocks in ptp_vclock_in_use(). The reason why this is appropriate is that any path that uses ptp->n_vclocks must unconditionally check if ptp->n_vclocks is greater than 0 before unregistering vclocks, and all functions are already written this way. And in the function that uses ptp->n_vclocks, we already get ptp->n_vclocks_mux before unregistering vclocks. Therefore, we need to remove the redundant check for ptp->n_vclocks in ptp_vclock_in_use() to prevent recursive locking. Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Jeongjun Park Acked-by: Richard Cochran Link: https://patch.msgid.link/20250520160717.7350-1-aha310510@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_private.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 18934e28469e..528d86a33f37 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -98,17 +98,7 @@ static inline int queue_cnt(const struct timestamp_event_queue *q) /* Check if ptp virtual clock is in use */ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) { - bool in_use = false; - - if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) - return true; - - if (!ptp->is_virtual_clock && ptp->n_vclocks) - in_use = true; - - mutex_unlock(&ptp->n_vclocks_mux); - - return in_use; + return !ptp->is_virtual_clock; } /* Check if ptp clock shall be free running */ From 800d0b9b6a8b1b354637b4194cc167ad1ce2bdd3 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 5 Jun 2025 12:51:16 -0400 Subject: [PATCH 028/497] fs/xattr.c: fix simple_xattr_list() commit 8b0ba61df5a1 ("fs/xattr.c: fix simple_xattr_list to always include security.* xattrs") failed to reset err after the call to security_inode_listsecurity(), which returns the length of the returned xattr name. This results in simple_xattr_list() incorrectly returning this length even if a POSIX acl is also set on the inode. Reported-by: Collin Funk Closes: https://lore.kernel.org/selinux/8734ceal7q.fsf@gmail.com/ Reported-by: Paul Eggert Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2369561 Fixes: 8b0ba61df5a1 ("fs/xattr.c: fix simple_xattr_list to always include security.* xattrs") Signed-off-by: Stephen Smalley Link: https://lore.kernel.org/20250605165116.2063-1-stephen.smalley.work@gmail.com Signed-off-by: Christian Brauner --- fs/xattr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xattr.c b/fs/xattr.c index 8ec5b0204bfd..600ae97969cf 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1479,6 +1479,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, buffer += err; } remaining_size -= err; + err = 0; read_lock(&xattrs->lock); for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) { From 82cbd06f327f3c2ccdee990bd356c9303ae168f9 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 5 Jun 2025 14:08:36 +0800 Subject: [PATCH 029/497] net: enetc: fix the netc-lib driver build dependency The kernel robot reported the following errors when the netc-lib driver was compiled as a loadable module and the enetc-core driver was built-in. ld.lld: error: undefined symbol: ntmp_init_cbdr referenced by enetc_cbdr.c:88 (drivers/net/ethernet/freescale/enetc/enetc_cbdr.c:88) ld.lld: error: undefined symbol: ntmp_free_cbdr referenced by enetc_cbdr.c:96 (drivers/net/ethernet/freescale/enetc/enetc_cbdr.c:96) Simply changing "tristate" to "bool" can fix this issue, but considering that the netc-lib driver needs to support being compiled as a loadable module and LS1028 does not need the netc-lib driver. Therefore, we add a boolean symbol 'NXP_NTMP' to enable 'NXP_NETC_LIB' as needed. And when adding NETC switch driver support in the future, there is no need to modify the dependency, just select "NXP_NTMP" and "NXP_NETC_LIB" at the same time. Reported-by: Arnd Bergmann Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505220734.x6TF6oHR-lkp@intel.com/ Fixes: 4701073c3deb ("net: enetc: add initial netc-lib driver to support NTMP") Suggested-by: Arnd Bergmann Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index e917132d3714..54b0f0a5a6bb 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 config FSL_ENETC_CORE tristate + select NXP_NETC_LIB if NXP_NTMP help This module supports common functionality between the PF and VF drivers for the NXP ENETC controller. @@ -22,6 +23,9 @@ config NXP_NETC_LIB Switch, such as NETC Table Management Protocol (NTMP) 2.0, common tc flower and debugfs interfaces and so on. +config NXP_NTMP + bool + config FSL_ENETC tristate "ENETC PF driver" depends on PCI_MSI @@ -45,7 +49,7 @@ config NXP_ENETC4 select FSL_ENETC_CORE select FSL_ENETC_MDIO select NXP_ENETC_PF_COMMON - select NXP_NETC_LIB + select NXP_NTMP select PHYLINK select DIMLIB help From 11fcf368506d347088e613edf6cd2604d70c454f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 6 Jun 2025 10:23:57 +0200 Subject: [PATCH 030/497] uapi: bitops: use UAPI-safe variant of BITS_PER_LONG again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") did not take in account that the usage of BITS_PER_LONG in __GENMASK() was changed to __BITS_PER_LONG for UAPI-safety in commit 3c7a8e190bc5 ("uapi: introduce uapi-friendly macros for GENMASK"). BITS_PER_LONG can not be used in UAPI headers as it derives from the kernel configuration and not from the current compiler invocation. When building compat userspace code or a compat vDSO its value will be incorrect. Switch back to __BITS_PER_LONG. Fixes: 1e7933a575ed ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Signed-off-by: Yury Norov [NVIDIA] --- include/uapi/linux/bits.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) From c0f691388992c708436ab5f6e810865be6ddf5c6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:04:11 +0200 Subject: [PATCH 031/497] intel_idle: Use subsys_initcall_sync() for initialization It is not necessary to wait until the device_initcall() stage with intel_idle initialization. All of its dependencies are met after all subsys_initcall()s have run, so subsys_initcall_sync() can be used for initializing it. It is also better to ensure that intel_idle will always initialize before the ACPI processor driver that uses module_init() for its initialization. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/2994397.e9J7NaK4W3@rjwysocki.net --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 8ccb483204fa..64ac4da08094 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -2518,7 +2518,7 @@ static int __init intel_idle_init(void) return retval; } -device_initcall(intel_idle_init); +subsys_initcall_sync(intel_idle_init); /* * We are not really modular, but we used to support that. Meaning we also From 02670deede2288d8e4e3d800477b27c091080fae Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 6 Jun 2025 13:21:34 -0700 Subject: [PATCH 032/497] libbpf: Handle unsupported mmap-based /sys/kernel/btf/vmlinux correctly libbpf_err_ptr() helpers are meant to return NULL and set errno, if there is an error. But btf_parse_raw_mmap() is meant to be used internally and is expected to return ERR_PTR() values. Because of this mismatch, when libbpf tries to mmap /sys/kernel/btf/vmlinux, we don't detect the error correctly with IS_ERR() check, and never fallback to old non-mmap-based way of loading vmlinux BTF. Fix this by using proper ERR_PTR() returns internally. Reported-by: Arnaldo Carvalho de Melo Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Fixes: 3c0421c93ce4 ("libbpf: Use mmap to parse vmlinux BTF from sysfs") Cc: Lorenz Bauer Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20250606202134.2738910-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index f1d495dc66bb..37682908cb0f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1384,12 +1384,12 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) fd = open(path, O_RDONLY); if (fd < 0) - return libbpf_err_ptr(-errno); + return ERR_PTR(-errno); if (fstat(fd, &st) < 0) { err = -errno; close(fd); - return libbpf_err_ptr(err); + return ERR_PTR(err); } data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); @@ -1397,7 +1397,7 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) close(fd); if (data == MAP_FAILED) - return libbpf_err_ptr(err); + return ERR_PTR(err); btf = btf_new(data, st.st_size, base_btf, true); if (IS_ERR(btf)) From 4c529a4a7260776bb4abe264498857b4537aa70d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 7 Jun 2025 14:22:56 +0200 Subject: [PATCH 033/497] x86/smp: PM/hibernate: Split arch_resume_nosmt() Move the inner part of the arch_resume_nosmt() code into a separate function called arch_cpu_rescan_dead_smt_siblings(), so it can be used in other places where "dead" SMT siblings may need to be taken online and offline again in order to get into deep idle states. No intentional functional impact. Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/3361688.44csPzL39Z@rjwysocki.net [ rjw: Prevent build issues with CONFIG_SMP unset ] Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/smp.c | 24 ++++++++++++++++++++++++ arch/x86/power/hibernate.c | 17 +++++------------ include/linux/cpu.h | 3 +++ 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18266cc3d98c..b014e6d229f9 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -299,3 +299,27 @@ struct smp_ops smp_ops = { .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); + +int arch_cpu_rescan_dead_smt_siblings(void) +{ + enum cpuhp_smt_control old = cpu_smt_control; + int ret; + + /* + * If SMT has been disabled and SMT siblings are in HLT, bring them back + * online and offline them again so that they end up in MWAIT proper. + * + * Called with hotplug enabled. + */ + if (old != CPU_SMT_DISABLED && old != CPU_SMT_FORCE_DISABLED) + return 0; + + ret = cpuhp_smt_enable(); + if (ret) + return ret; + + ret = cpuhp_smt_disable(old); + + return ret; +} +EXPORT_SYMBOL_GPL(arch_cpu_rescan_dead_smt_siblings); diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index a7c23f2a58c9..a2294c1649f6 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -192,7 +192,8 @@ int relocate_restore_code(void) int arch_resume_nosmt(void) { - int ret = 0; + int ret; + /* * We reached this while coming out of hibernation. This means * that SMT siblings are sleeping in hlt, as mwait is not safe @@ -206,18 +207,10 @@ int arch_resume_nosmt(void) * Called with hotplug disabled. */ cpu_hotplug_enable(); - if (cpu_smt_control == CPU_SMT_DISABLED || - cpu_smt_control == CPU_SMT_FORCE_DISABLED) { - enum cpuhp_smt_control old = cpu_smt_control; - ret = cpuhp_smt_enable(); - if (ret) - goto out; - ret = cpuhp_smt_disable(old); - if (ret) - goto out; - } -out: + ret = arch_cpu_rescan_dead_smt_siblings(); + cpu_hotplug_disable(); + return ret; } diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e6089abc28e2..96a3a0d6a60e 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -120,6 +120,7 @@ extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); int bringup_hibernate_cpu(unsigned int sleep_cpu); void bringup_nonboot_cpus(unsigned int max_cpus); +int arch_cpu_rescan_dead_smt_siblings(void); #else /* CONFIG_SMP */ #define cpuhp_tasks_frozen 0 @@ -134,6 +135,8 @@ static inline void cpu_maps_update_done(void) static inline int add_cpu(unsigned int cpu) { return 0;} +static inline int arch_cpu_rescan_dead_smt_siblings(void) { return 0; } + #endif /* CONFIG_SMP */ extern const struct bus_type cpu_subsys; From a430c11f401589a0f4f57fd398271a5d85142c7a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:06:08 +0200 Subject: [PATCH 034/497] intel_idle: Rescan "dead" SMT siblings during initialization Make intel_idle_init() call arch_cpu_rescan_dead_smt_siblings() after successfully registering intel_idle as the cpuidle driver so as to allow the "dead" SMT siblings (if any) to go into deep idle states. This is necessary for the processor to be able to reach deep package C-states (like PC10) going forward which is requisite for reducing power sufficiently in suspend-to-idle, among other things. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/10669885.nUPlyArG6x@rjwysocki.net --- drivers/idle/intel_idle.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 64ac4da08094..63565814c7e5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -2507,6 +2507,8 @@ static int __init intel_idle_init(void) pr_debug("Local APIC timer is reliable in %s\n", boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); + arch_cpu_rescan_dead_smt_siblings(); + return 0; hp_setup_fail: From f694481b1d3177144fcac4242eb750cfcb9f7bd5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:07:31 +0200 Subject: [PATCH 035/497] ACPI: processor: Rescan "dead" SMT siblings during initialization Make acpi_processor_driver_init() call arch_cpu_rescan_dead_smt_siblings(), via a new wrapper function called acpi_idle_rescan_dead_smt_siblings(), after successfully initializing the driver, to allow the "dead" SMT siblings to go into deep idle states, which is necessary for the processor to be able to reach deep package C-states (like PC10) going forward, so that power can be reduced sufficiently in suspend-to-idle, among other things. However, do it only if the ACPI idle driver is the current cpuidle driver (otherwise it is assumed that another cpuidle driver will take care of this) and avoid doing it on architectures other than x86. Signed-off-by: Rafael J. Wysocki Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/2005721.PYKUYFuaPT@rjwysocki.net --- drivers/acpi/internal.h | 6 ++++++ drivers/acpi/processor_driver.c | 3 +++ drivers/acpi/processor_idle.c | 8 ++++++++ 3 files changed, 17 insertions(+) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 00910ccd7eda..e2781864fdce 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -175,6 +175,12 @@ bool processor_physically_present(acpi_handle handle); static inline void acpi_early_processor_control_setup(void) {} #endif +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void); +#else +static inline void acpi_idle_rescan_dead_smt_siblings(void) {} +#endif + /* -------------------------------------------------------------------------- Embedded Controller -------------------------------------------------------------------------- */ diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 3b281bc1e73c..65e779be64ff 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -279,6 +279,9 @@ static int __init acpi_processor_driver_init(void) * after acpi_cppc_processor_probe() has been called for all online CPUs */ acpi_processor_init_invariance_cppc(); + + acpi_idle_rescan_dead_smt_siblings(); + return 0; err: driver_unregister(&acpi_processor_driver); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index e2febca2ec13..2c2dc559e0f8 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -24,6 +24,8 @@ #include #include +#include "internal.h" + /* * Include the apic definitions for x86 to have the APIC timer related defines * available also for UP (on SMP it gets magically included via linux/smp.h). @@ -55,6 +57,12 @@ struct cpuidle_driver acpi_idle_driver = { }; #ifdef CONFIG_ACPI_PROCESSOR_CSTATE +void acpi_idle_rescan_dead_smt_siblings(void) +{ + if (cpuidle_get_driver() == &acpi_idle_driver) + arch_cpu_rescan_dead_smt_siblings(); +} + static DEFINE_PER_CPU(struct acpi_processor_cx * [CPUIDLE_STATE_MAX], acpi_cstate); From a18d098f2aab82abde1d59c56aef9beca01c892b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jun 2025 17:09:35 +0200 Subject: [PATCH 036/497] Reapply "x86/smp: Eliminate mwait_play_dead_cpuid_hint()" Revert commit 70523f335734 ("Revert "x86/smp: Eliminate mwait_play_dead_cpuid_hint()"") to reapply the changes from commit 96040f7273e2 ("x86/smp: Eliminate mwait_play_dead_cpuid_hint()") reverted by it. Previously, these changes caused idle power to rise on systems booting with "nosmt" in the kernel command line because they effectively caused "dead" SMT siblings to remain in idle state C1 after executing the HLT instruction, which prevented the processor from reaching package idle states deeper than PC2 going forward. Now, the "dead" SMT siblings are rescanned after initializing a proper cpuidle driver for the processor (either intel_idle or ACPI idle), at which point they are able to enter a sufficiently deep idle state in native_play_dead() via cpuidle, so the code changes in question can be reapplied. Signed-off-by: Rafael J. Wysocki Acked-by: Dave Hansen Tested-by: Artem Bityutskiy Link: https://patch.msgid.link/7813065.EvYhyI6sBW@rjwysocki.net --- arch/x86/kernel/smpboot.c | 54 +++++---------------------------------- 1 file changed, 7 insertions(+), 47 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fc78c2325fd2..58ede3fa6a75 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1244,6 +1244,10 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ void __noreturn mwait_play_dead(unsigned int eax_hint) { struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead); @@ -1289,50 +1293,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint) } } -/* - * We need to flush the caches before going to sleep, lest we have - * dirty data in our caches when we come back up. - */ -static inline void mwait_play_dead_cpuid_hint(void) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int highest_cstate = 0; - unsigned int highest_subcstate = 0; - int i; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) - return; - if (!this_cpu_has(X86_FEATURE_MWAIT)) - return; - if (!this_cpu_has(X86_FEATURE_CLFLUSH)) - return; - - eax = CPUID_LEAF_MWAIT; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - - /* - * eax will be 0 if EDX enumeration is not valid. - * Initialized below to cstate, sub_cstate value when EDX is valid. - */ - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { - eax = 0; - } else { - edx >>= MWAIT_SUBSTATE_SIZE; - for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { - if (edx & MWAIT_SUBSTATE_MASK) { - highest_cstate = i; - highest_subcstate = edx & MWAIT_SUBSTATE_MASK; - } - } - eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | - (highest_subcstate - 1); - } - - mwait_play_dead(eax); -} - /* * Kick all "offline" CPUs out of mwait on kexec(). See comment in * mwait_play_dead(). @@ -1383,9 +1343,9 @@ void native_play_dead(void) play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - mwait_play_dead_cpuid_hint(); - if (cpuidle_play_dead()) - hlt_play_dead(); + /* Below returns only on error. */ + cpuidle_play_dead(); + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ From 1650d32b92b01db03a1a95d69ee74fcbc34d4b00 Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Tue, 18 Mar 2025 20:50:27 +0000 Subject: [PATCH 037/497] ath10k: snoc: fix unbalanced IRQ enable in crash recovery In ath10k_snoc_hif_stop() we skip disabling the IRQs in the crash recovery flow, but we still unconditionally call enable again in ath10k_snoc_hif_start(). We can't check the ATH10K_FLAG_CRASH_FLUSH bit since it is cleared before hif_start() is called, so instead check the ATH10K_SNOC_FLAG_RECOVERY flag and skip enabling the IRQs during crash recovery. This fixes unbalanced IRQ enable splats that happen after recovering from a crash. Fixes: 0e622f67e041 ("ath10k: add support for WCN3990 firmware crash recovery") Signed-off-by: Caleb Connolly Tested-by: Loic Poulain Link: https://patch.msgid.link/20250318205043.1043148-1-caleb.connolly@linaro.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/snoc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 866bad2db334..65673b1aba55 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -937,7 +937,9 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) dev_set_threaded(ar->napi_dev, true); ath10k_core_napi_enable(ar); - ath10k_snoc_irq_enable(ar); + /* IRQs are left enabled when we restart due to a firmware crash */ + if (!test_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags)) + ath10k_snoc_irq_enable(ar); ath10k_snoc_rx_post(ar); clear_bit(ATH10K_SNOC_FLAG_RECOVERY, &ar_snoc->flags); From dc9c4252fe0d7a7f1ee904405ea91534277305bf Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Thu, 22 May 2025 15:17:04 +0200 Subject: [PATCH 038/497] wifi: ath10k: Avoid vdev delete timeout when firmware is already down In some scenarios, the firmware may be stopped before the interface is removed, either due to a crash or because the remoteproc (e.g., MPSS) is shut down early during system reboot or shutdown. This leads to a delay during interface teardown, as the driver waits for a vdev delete response that never arrives, eventually timing out. Example (SNOC): $ echo stop > /sys/class/remoteproc/remoteproc0/state [ 71.64] remoteproc remoteproc0: stopped remote processor modem $ reboot [ 74.84] ath10k_snoc c800000.wifi: failed to transmit packet, dropping: -108 [ 74.84] ath10k_snoc c800000.wifi: failed to submit frame: -108 [...] [ 82.39] ath10k_snoc c800000.wifi: Timeout in receiving vdev delete response To avoid this, skip waiting for the vdev delete response if the firmware is already marked as unreachable (`ATH10K_FLAG_CRASH_FLUSH`), similar to how `ath10k_mac_wait_tx_complete()` and `ath10k_vdev_setup_sync()` handle this case. Signed-off-by: Loic Poulain Link: https://patch.msgid.link/20250522131704.612206-1-loic.poulain@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/mac.c | 33 ++++++++++++++++++++------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 8c7ffea0fa44..07fe05384cdf 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4,6 +4,7 @@ * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "mac.h" @@ -1022,6 +1023,26 @@ static inline int ath10k_vdev_setup_sync(struct ath10k *ar) return ar->last_wmi_vdev_start_status; } +static inline int ath10k_vdev_delete_sync(struct ath10k *ar) +{ + unsigned long time_left; + + lockdep_assert_held(&ar->conf_mutex); + + if (!test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) + return 0; + + if (test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) + return -ESHUTDOWN; + + time_left = wait_for_completion_timeout(&ar->vdev_delete_done, + ATH10K_VDEV_DELETE_TIMEOUT_HZ); + if (time_left == 0) + return -ETIMEDOUT; + + return 0; +} + static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id) { struct cfg80211_chan_def *chandef = NULL; @@ -5900,7 +5921,6 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; struct ath10k_vif *arvif = (void *)vif->drv_priv; struct ath10k_peer *peer; - unsigned long time_left; int ret; int i; @@ -5940,13 +5960,10 @@ static void ath10k_remove_interface(struct ieee80211_hw *hw, ath10k_warn(ar, "failed to delete WMI vdev %i: %d\n", arvif->vdev_id, ret); - if (test_bit(WMI_SERVICE_SYNC_DELETE_CMDS, ar->wmi.svc_map)) { - time_left = wait_for_completion_timeout(&ar->vdev_delete_done, - ATH10K_VDEV_DELETE_TIMEOUT_HZ); - if (time_left == 0) { - ath10k_warn(ar, "Timeout in receiving vdev delete response\n"); - goto out; - } + ret = ath10k_vdev_delete_sync(ar); + if (ret) { + ath10k_warn(ar, "Error in receiving vdev delete response: %d\n", ret); + goto out; } /* Some firmware revisions don't notify host about self-peer removal From 593963660919a97a4546acfd706dac93625724f5 Mon Sep 17 00:00:00 2001 From: Sebastian Gottschall Date: Tue, 4 Mar 2025 08:21:31 +0700 Subject: [PATCH 039/497] wil6210: fix support for sparrow chipsets the wil6210 driver irq handling code is unconditionally writing edma irq registers which are supposed to be only used on Talyn chipsets. This however leade to a chipset hang on the older sparrow chipset generation and firmware will not even boot. Fix that by simply checking for edma support before handling these registers. Tested on Netgear R9000 Signed-off-by: Sebastian Gottschall Link: https://patch.msgid.link/20250304012131.25970-2-s.gottschall@dd-wrt.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/wil6210/interrupt.c | 26 ++++++++++++-------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index 67172385a5d6..89d4394cedcf 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -179,9 +179,11 @@ void wil_mask_irq(struct wil6210_priv *wil) wil_dbg_irq(wil, "mask_irq\n"); wil6210_mask_irq_tx(wil); - wil6210_mask_irq_tx_edma(wil); + if (wil->use_enhanced_dma_hw) + wil6210_mask_irq_tx_edma(wil); wil6210_mask_irq_rx(wil); - wil6210_mask_irq_rx_edma(wil); + if (wil->use_enhanced_dma_hw) + wil6210_mask_irq_rx_edma(wil); wil6210_mask_irq_misc(wil, true); wil6210_mask_irq_pseudo(wil); } @@ -190,10 +192,12 @@ void wil_unmask_irq(struct wil6210_priv *wil) { wil_dbg_irq(wil, "unmask_irq\n"); - wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, ICC), - WIL_ICR_ICC_VALUE); - wil_w(wil, RGF_DMA_EP_TX_ICR + offsetof(struct RGF_ICR, ICC), - WIL_ICR_ICC_VALUE); + if (wil->use_enhanced_dma_hw) { + wil_w(wil, RGF_DMA_EP_RX_ICR + offsetof(struct RGF_ICR, ICC), + WIL_ICR_ICC_VALUE); + wil_w(wil, RGF_DMA_EP_TX_ICR + offsetof(struct RGF_ICR, ICC), + WIL_ICR_ICC_VALUE); + } wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICC), WIL_ICR_ICC_MISC_VALUE); wil_w(wil, RGF_INT_GEN_TX_ICR + offsetof(struct RGF_ICR, ICC), @@ -845,10 +849,12 @@ void wil6210_clear_irq(struct wil6210_priv *wil) offsetof(struct RGF_ICR, ICR)); wil_clear32(wil->csr + HOSTADDR(RGF_DMA_EP_TX_ICR) + offsetof(struct RGF_ICR, ICR)); - wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_RX_ICR) + - offsetof(struct RGF_ICR, ICR)); - wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_TX_ICR) + - offsetof(struct RGF_ICR, ICR)); + if (wil->use_enhanced_dma_hw) { + wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_RX_ICR) + + offsetof(struct RGF_ICR, ICR)); + wil_clear32(wil->csr + HOSTADDR(RGF_INT_GEN_TX_ICR) + + offsetof(struct RGF_ICR, ICR)); + } wil_clear32(wil->csr + HOSTADDR(RGF_DMA_EP_MISC_ICR) + offsetof(struct RGF_ICR, ICR)); wmb(); /* make sure write completed */ From 9f6e82d11bb9692a90d20b10f87345598945c803 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:42 +0800 Subject: [PATCH 040/497] wifi: ath11k: avoid burning CPU in ath11k_debugfs_fw_stats_request() We get report [1] that CPU is running a hot loop in ath11k_debugfs_fw_stats_request(): 94.60% 0.00% i3status [kernel.kallsyms] [k] do_syscall_64 | --94.60%--do_syscall_64 | --94.55%--__sys_sendmsg ___sys_sendmsg ____sys_sendmsg netlink_sendmsg netlink_unicast genl_rcv netlink_rcv_skb genl_rcv_msg | --94.55%--genl_family_rcv_msg_dumpit __netlink_dump_start netlink_dump genl_dumpit nl80211_dump_station | --94.55%--ieee80211_dump_station sta_set_sinfo | --94.55%--ath11k_mac_op_sta_statistics ath11k_debugfs_get_fw_stats | --94.55%--ath11k_debugfs_fw_stats_request | |--41.73%--_raw_spin_lock_bh | |--22.74%--__local_bh_enable_ip | |--9.22%--_raw_spin_unlock_bh | --6.66%--srso_alias_safe_ret This is because, if for whatever reason ar->fw_stats_done is not set by ath11k_update_stats_event(), ath11k_debugfs_fw_stats_request() won't yield CPU before an up to 3s timeout. Change to completion mechanism to avoid CPU burning. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Reported-by: Yury Vostrikov Closes: https://lore.kernel.org/all/7324ac7a-8b7a-42a5-aa19-de52138ff638@app.fastmail.com/ # [1] Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-2-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 1 + drivers/net/wireless/ath/ath11k/core.h | 2 +- drivers/net/wireless/ath/ath11k/debugfs.c | 38 +++++++++-------------- drivers/net/wireless/ath/ath11k/mac.c | 2 +- drivers/net/wireless/ath/ath11k/wmi.c | 2 +- 5 files changed, 18 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 2e9f8a5e61e4..c8c5008be7f2 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -990,6 +990,7 @@ void ath11k_fw_stats_init(struct ath11k *ar) INIT_LIST_HEAD(&ar->fw_stats.bcn); init_completion(&ar->fw_stats_complete); + init_completion(&ar->fw_stats_done); } void ath11k_fw_stats_free(struct ath11k_fw_stats *stats) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 339d4fca1ed5..41bb81d00189 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -784,7 +784,7 @@ struct ath11k { u8 alpha2[REG_ALPHA2_LEN + 1]; struct ath11k_fw_stats fw_stats; struct completion fw_stats_complete; - bool fw_stats_done; + struct completion fw_stats_done; /* protected by conf_mutex */ bool ps_state_enable; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index bf192529e3fe..1d03e3aab011 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2020 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -96,7 +96,6 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) { spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); spin_unlock_bh(&ar->data_lock); @@ -114,7 +113,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* WMI_REQUEST_PDEV_STAT request has been already processed */ if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); return; } @@ -138,7 +137,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * &ar->fw_stats.vdevs); if (is_end) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); num_vdev = 0; } return; @@ -158,7 +157,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * &ar->fw_stats.bcn); if (is_end) { - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); num_bcn = 0; } } @@ -168,21 +167,15 @@ static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, struct stats_request_params *req_param) { struct ath11k_base *ab = ar->ab; - unsigned long timeout, time_left; + unsigned long time_left; int ret; lockdep_assert_held(&ar->conf_mutex); - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + secs_to_jiffies(3); - ath11k_debugfs_fw_stats_reset(ar); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); @@ -193,21 +186,18 @@ static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, } time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) return -ETIMEDOUT; - for (;;) { - if (time_after(jiffies, timeout)) - break; + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); - } return 0; } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 08d7b136851f..9ab501cd4f47 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9390,11 +9390,11 @@ static int ath11k_fw_stats_request(struct ath11k *ar, lockdep_assert_held(&ar->conf_mutex); spin_lock_bh(&ar->data_lock); - ar->fw_stats_done = false; ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); spin_unlock_bh(&ar->data_lock); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); if (ret) { diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index d7f852bebf4a..27cb0bb06b93 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8189,7 +8189,7 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); - ar->fw_stats_done = true; + complete(&ar->fw_stats_done); goto complete; } From 2bcf73b2612dda7432f2c2eaad6679bd291791f2 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:43 +0800 Subject: [PATCH 041/497] wifi: ath11k: don't use static variables in ath11k_debugfs_fw_stats_process() Currently ath11k_debugfs_fw_stats_process() is using static variables to count firmware stat events. Taking num_vdev as an example, if for whatever reason ( say ar->num_started_vdevs is 0 or firmware bug etc.) the following condition (++num_vdev) == total_vdevs_started is not met, is_end is not set thus num_vdev won't be cleared. Next time when firmware stats is requested again, even if everything is working fine, we will fail due to the condition above will never be satisfied. The same applies to num_bcn as well. Change to use non-static counters so that we have a chance to clear them each time firmware stats is requested. Currently only ath11k_fw_stats_request() and ath11k_debugfs_fw_stats_request() are requesting firmware stats, so clear counters there. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: da3a9d3c1576 ("ath11k: refactor debugfs code into debugfs.c") Signed-off-by: Baochen Qiang Acked-by: Kalle Valo Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-3-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.h | 2 ++ drivers/net/wireless/ath/ath11k/debugfs.c | 16 +++++++--------- drivers/net/wireless/ath/ath11k/mac.c | 2 ++ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 41bb81d00189..6b2f207975e3 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -600,6 +600,8 @@ struct ath11k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; + u32 num_vdev_recvd; + u32 num_bcn_recvd; }; struct ath11k_dbg_htt_stats { diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index 1d03e3aab011..27c93c0b4c22 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -98,6 +98,8 @@ static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) spin_lock_bh(&ar->data_lock); ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); } @@ -106,7 +108,6 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * struct ath11k_base *ab = ar->ab; struct ath11k_pdev *pdev; bool is_end; - static unsigned int num_vdev, num_bcn; size_t total_vdevs_started = 0; int i; @@ -131,15 +132,14 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * total_vdevs_started += ar->num_started_vdevs; } - is_end = ((++num_vdev) == total_vdevs_started); + is_end = ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started); list_splice_tail_init(&stats->vdevs, &ar->fw_stats.vdevs); - if (is_end) { + if (is_end) complete(&ar->fw_stats_done); - num_vdev = 0; - } + return; } @@ -151,15 +151,13 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++num_bcn) == ar->num_started_vdevs); + is_end = ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); - if (is_end) { + if (is_end) complete(&ar->fw_stats_done); - num_bcn = 0; - } } } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 9ab501cd4f47..5e098780cf23 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9391,6 +9391,8 @@ static int ath11k_fw_stats_request(struct ath11k *ar, spin_lock_bh(&ar->data_lock); ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); reinit_completion(&ar->fw_stats_complete); From 3b6d00fa883075dcaf49221538230e038a9c0b43 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:44 +0800 Subject: [PATCH 042/497] wifi: ath11k: don't wait when there is no vdev started For WMI_REQUEST_VDEV_STAT request, firmware might split response into multiple events dut to buffer limit, hence currently in ath11k_debugfs_fw_stats_process() we wait until all events received. In case there is no vdev started, this results in that below condition would never get satisfied ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started) finally the requestor would be blocked until wait time out. The same applies to WMI_REQUEST_BCN_STAT request as well due to: ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs) Change to check the number of started vdev first: if it is zero, finish wait directly; if not, follow the old way. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-4-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/debugfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index 27c93c0b4c22..ccf0e62c7d7a 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -107,7 +107,7 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * { struct ath11k_base *ab = ar->ab; struct ath11k_pdev *pdev; - bool is_end; + bool is_end = true; size_t total_vdevs_started = 0; int i; @@ -132,7 +132,9 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * total_vdevs_started += ar->num_started_vdevs; } - is_end = ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started); + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); list_splice_tail_init(&stats->vdevs, &ar->fw_stats.vdevs); @@ -151,7 +153,9 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs); + if (ar->num_started_vdevs) + is_end = ((++ar->fw_stats.num_bcn_recvd) == + ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); From 72610ed7d79da17ee09102534d6c696a4ea8a08e Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:45 +0800 Subject: [PATCH 043/497] wifi: ath11k: move some firmware stats related functions outside of debugfs Commit b488c766442f ("ath11k: report rssi of each chain to mac80211 for QCA6390/WCN6855") and commit c3b39553fc77 ("ath11k: add signal report to mac80211 for QCA6390 and WCN6855") call debugfs functions in mac ops. Those functions are no-ops if CONFIG_ATH11K_DEBUGFS is not enabled, thus cause wrong status reported. Move them to mac.c. Besides, since WMI_REQUEST_RSSI_PER_CHAIN_STAT and WMI_REQUEST_VDEV_STAT stats could also be requested via mac ops, process them directly in ath11k_update_stats_event(). Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Fixes: b488c766442f ("ath11k: report rssi of each chain to mac80211 for QCA6390/WCN6855") Fixes: c3b39553fc77 ("ath11k: add signal report to mac80211 for QCA6390 and WCN6855") Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-5-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/debugfs.c | 126 ++-------------------- drivers/net/wireless/ath/ath11k/debugfs.h | 10 +- drivers/net/wireless/ath/ath11k/mac.c | 88 ++++++++++++++- drivers/net/wireless/ath/ath11k/mac.h | 4 +- drivers/net/wireless/ath/ath11k/wmi.c | 45 +++++++- 5 files changed, 135 insertions(+), 138 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c index ccf0e62c7d7a..5d46f8e4c231 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.c +++ b/drivers/net/wireless/ath/ath11k/debugfs.c @@ -93,58 +93,14 @@ void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, spin_unlock_bh(&dbr_data->lock); } -static void ath11k_debugfs_fw_stats_reset(struct ath11k *ar) -{ - spin_lock_bh(&ar->data_lock); - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); - ar->fw_stats.num_vdev_recvd = 0; - ar->fw_stats.num_bcn_recvd = 0; - spin_unlock_bh(&ar->data_lock); -} - void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats) { struct ath11k_base *ab = ar->ab; - struct ath11k_pdev *pdev; bool is_end = true; - size_t total_vdevs_started = 0; - int i; - - /* WMI_REQUEST_PDEV_STAT request has been already processed */ - - if (stats->stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { - complete(&ar->fw_stats_done); - return; - } - - if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { - if (list_empty(&stats->vdevs)) { - ath11k_warn(ab, "empty vdev stats"); - return; - } - /* FW sends all the active VDEV stats irrespective of PDEV, - * hence limit until the count of all VDEVs started - */ - for (i = 0; i < ab->num_radios; i++) { - pdev = rcu_dereference(ab->pdevs_active[i]); - if (pdev && pdev->ar) - total_vdevs_started += ar->num_started_vdevs; - } - - if (total_vdevs_started) - is_end = ((++ar->fw_stats.num_vdev_recvd) == - total_vdevs_started); - - list_splice_tail_init(&stats->vdevs, - &ar->fw_stats.vdevs); - - if (is_end) - complete(&ar->fw_stats_done); - - return; - } + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_RSSI_PER_CHAIN_STAT and + * WMI_REQUEST_VDEV_STAT requests have been already processed. + */ if (stats->stats_id == WMI_REQUEST_BCN_STAT) { if (list_empty(&stats->bcn)) { ath11k_warn(ab, "empty bcn stats"); @@ -165,76 +121,6 @@ void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats * } } -static int ath11k_debugfs_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - ath11k_debugfs_fw_stats_reset(ar); - - reinit_completion(&ar->fw_stats_complete); - reinit_completion(&ar->fw_stats_done); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) - return -ETIMEDOUT; - - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); - if (!time_left) - return -ETIMEDOUT; - - return 0; -} - -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id) -{ - struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param; - int ret; - - mutex_lock(&ar->conf_mutex); - - if (ar->state != ATH11K_STATE_ON) { - ret = -ENETDOWN; - goto err_unlock; - } - - req_param.pdev_id = pdev_id; - req_param.vdev_id = vdev_id; - req_param.stats_id = stats_id; - - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); - if (ret) - ath11k_warn(ab, "failed to request fw stats: %d\n", ret); - - ath11k_dbg(ab, ATH11K_DBG_WMI, - "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", - pdev_id, vdev_id, stats_id); - -err_unlock: - mutex_unlock(&ar->conf_mutex); - - return ret; -} - static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) { struct ath11k *ar = inode->i_private; @@ -260,7 +146,7 @@ static int ath11k_open_pdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_PDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); goto err_free; @@ -331,7 +217,7 @@ static int ath11k_open_vdev_stats(struct inode *inode, struct file *file) req_param.vdev_id = 0; req_param.stats_id = WMI_REQUEST_VDEV_STAT; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw vdev stats: %d\n", ret); goto err_free; @@ -407,7 +293,7 @@ static int ath11k_open_bcn_stats(struct inode *inode, struct file *file) continue; req_param.vdev_id = arvif->vdev_id; - ret = ath11k_debugfs_fw_stats_request(ar, &req_param); + ret = ath11k_mac_fw_stats_request(ar, &req_param); if (ret) { ath11k_warn(ar->ab, "failed to request fw bcn stats: %d\n", ret); goto err_free; diff --git a/drivers/net/wireless/ath/ath11k/debugfs.h b/drivers/net/wireless/ath/ath11k/debugfs.h index a39e458637b0..ed7fec177588 100644 --- a/drivers/net/wireless/ath/ath11k/debugfs.h +++ b/drivers/net/wireless/ath/ath11k/debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2022, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef _ATH11K_DEBUGFS_H_ @@ -273,8 +273,6 @@ void ath11k_debugfs_unregister(struct ath11k *ar); void ath11k_debugfs_fw_stats_process(struct ath11k *ar, struct ath11k_fw_stats *stats); void ath11k_debugfs_fw_stats_init(struct ath11k *ar); -int ath11k_debugfs_get_fw_stats(struct ath11k *ar, u32 pdev_id, - u32 vdev_id, u32 stats_id); static inline bool ath11k_debugfs_is_pktlog_lite_mode_enabled(struct ath11k *ar) { @@ -381,12 +379,6 @@ static inline int ath11k_debugfs_rx_filter(struct ath11k *ar) return 0; } -static inline int ath11k_debugfs_get_fw_stats(struct ath11k *ar, - u32 pdev_id, u32 vdev_id, u32 stats_id) -{ - return 0; -} - static inline void ath11k_debugfs_add_dbring_entry(struct ath11k *ar, enum wmi_direct_buffer_module id, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 5e098780cf23..1550533b5587 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -8997,6 +8997,86 @@ static void ath11k_mac_put_chain_rssi(struct station_info *sinfo, } } +static void ath11k_mac_fw_stats_reset(struct ath11k *ar) +{ + spin_lock_bh(&ar->data_lock); + ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); + ath11k_fw_stats_vdevs_free(&ar->fw_stats.vdevs); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; + spin_unlock_bh(&ar->data_lock); +} + +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param) +{ + struct ath11k_base *ab = ar->ab; + unsigned long time_left; + int ret; + + lockdep_assert_held(&ar->conf_mutex); + + ath11k_mac_fw_stats_reset(ar); + + reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); + + ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); + + if (ret) { + ath11k_warn(ab, "could not request fw stats (%d)\n", + ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); + if (!time_left) + return -ETIMEDOUT; + + /* FW stats can get split when exceeding the stats data buffer limit. + * In that case, since there is no end marking for the back-to-back + * received 'update stats' event, we keep a 3 seconds timeout in case, + * fw_stats_done is not marked yet + */ + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + +static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, + u32 vdev_id, u32 stats_id) +{ + struct ath11k_base *ab = ar->ab; + struct stats_request_params req_param; + int ret; + + mutex_lock(&ar->conf_mutex); + + if (ar->state != ATH11K_STATE_ON) { + ret = -ENETDOWN; + goto err_unlock; + } + + req_param.pdev_id = pdev_id; + req_param.vdev_id = vdev_id; + req_param.stats_id = stats_id; + + ret = ath11k_mac_fw_stats_request(ar, &req_param); + if (ret) + ath11k_warn(ab, "failed to request fw stats: %d\n", ret); + + ath11k_dbg(ab, ATH11K_DBG_WMI, + "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", + pdev_id, vdev_id, stats_id); + +err_unlock: + mutex_unlock(&ar->conf_mutex); + + return ret; +} + static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -9034,8 +9114,8 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { + !ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_RSSI_PER_CHAIN_STAT)) { ath11k_mac_put_chain_rssi(sinfo, arsta, "fw stats", true); } @@ -9043,8 +9123,8 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, if (!signal && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && - !(ath11k_debugfs_get_fw_stats(ar, ar->pdev->pdev_id, 0, - WMI_REQUEST_VDEV_STAT))) + !(ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_VDEV_STAT))) signal = arsta->rssi_beacon; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, diff --git a/drivers/net/wireless/ath/ath11k/mac.h b/drivers/net/wireless/ath/ath11k/mac.h index f5800fbecff8..5e61eea1bb03 100644 --- a/drivers/net/wireless/ath/ath11k/mac.h +++ b/drivers/net/wireless/ath/ath11k/mac.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause-Clear */ /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2023, 2025 Qualcomm Innovation Center, Inc. All rights reserved. */ #ifndef ATH11K_MAC_H @@ -179,4 +179,6 @@ int ath11k_mac_vif_set_keepalive(struct ath11k_vif *arvif, void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar, struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *ctx); +int ath11k_mac_fw_stats_request(struct ath11k *ar, + struct stats_request_params *req_param); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 27cb0bb06b93..98811726d33b 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8158,6 +8158,11 @@ static void ath11k_peer_assoc_conf_event(struct ath11k_base *ab, struct sk_buff static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *skb) { struct ath11k_fw_stats stats = {}; + size_t total_vdevs_started = 0; + struct ath11k_pdev *pdev; + bool is_end = true; + int i; + struct ath11k *ar; int ret; @@ -8184,7 +8189,8 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk spin_lock_bh(&ar->data_lock); - /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via + /* WMI_REQUEST_PDEV_STAT, WMI_REQUEST_VDEV_STAT and + * WMI_REQUEST_RSSI_PER_CHAIN_STAT can be requested via mac ops or via * debugfs fw stats. Therefore, processing it separately. */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { @@ -8193,9 +8199,40 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk goto complete; } - /* WMI_REQUEST_VDEV_STAT, WMI_REQUEST_BCN_STAT and WMI_REQUEST_RSSI_PER_CHAIN_STAT - * are currently requested only via debugfs fw stats. Hence, processing these - * in debugfs context + if (stats.stats_id == WMI_REQUEST_RSSI_PER_CHAIN_STAT) { + complete(&ar->fw_stats_done); + goto complete; + } + + if (stats.stats_id == WMI_REQUEST_VDEV_STAT) { + if (list_empty(&stats.vdevs)) { + ath11k_warn(ab, "empty vdev stats"); + goto complete; + } + /* FW sends all the active VDEV stats irrespective of PDEV, + * hence limit until the count of all VDEVs started + */ + for (i = 0; i < ab->num_radios; i++) { + pdev = rcu_dereference(ab->pdevs_active[i]); + if (pdev && pdev->ar) + total_vdevs_started += ar->num_started_vdevs; + } + + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); + + list_splice_tail_init(&stats.vdevs, + &ar->fw_stats.vdevs); + + if (is_end) + complete(&ar->fw_stats_done); + + goto complete; + } + + /* WMI_REQUEST_BCN_STAT is currently requested only via debugfs fw stats. + * Hence, processing it in debugfs context */ ath11k_debugfs_fw_stats_process(ar, &stats); From 81f64165c9dc2d9b070d8240dd369974ebe188d3 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:46 +0800 Subject: [PATCH 044/497] wifi: ath11k: adjust unlock sequence in ath11k_update_stats_event() Currently RCU lock and ar->data_lock are acquired in a sequence of rcu_read_lock() spin_lock_bh(&ar->data_lock) but released in a sequence of rcu_read_unlock() spin_unlock_bh(&ar->data_lock) Although there are no apparent issues with this, reorder them to achieve symmetry. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Signed-off-by: Baochen Qiang Acked-by: Kalle Valo Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-6-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 98811726d33b..56af2e9634f4 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -8238,8 +8238,8 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk complete: complete(&ar->fw_stats_complete); - rcu_read_unlock(); spin_unlock_bh(&ar->data_lock); + rcu_read_unlock(); /* Since the stats's pdev, vdev and beacon list are spliced and reinitialised * at this point, no need to free the individual list. From c5b92a2c18938ebb08e8d4062408ab1524da31c3 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:47 +0800 Subject: [PATCH 045/497] wifi: ath11k: move locking outside of ath11k_mac_get_fw_stats() Currently ath11k_mac_get_fw_stats() is acquiring/releasing ar->conf_mutex by itself. In order to reuse this function in a context where that lock is already taken, move lock handling to its callers, then the function itself only has to assert it. There is only one caller now, i.e., ath11k_mac_op_sta_statistics(), so add lock handling there. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-7-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 1550533b5587..18e1d78cac45 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9052,12 +9052,10 @@ static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, struct stats_request_params req_param; int ret; - mutex_lock(&ar->conf_mutex); + lockdep_assert_held(&ar->conf_mutex); - if (ar->state != ATH11K_STATE_ON) { - ret = -ENETDOWN; - goto err_unlock; - } + if (ar->state != ATH11K_STATE_ON) + return -ENETDOWN; req_param.pdev_id = pdev_id; req_param.vdev_id = vdev_id; @@ -9071,9 +9069,6 @@ static int ath11k_mac_get_fw_stats(struct ath11k *ar, u32 pdev_id, "debug get fw stat pdev id %d vdev id %d stats id 0x%x\n", pdev_id, vdev_id, stats_id); -err_unlock: - mutex_unlock(&ar->conf_mutex); - return ret; } @@ -9111,6 +9106,7 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, ath11k_mac_put_chain_rssi(sinfo, arsta, "ppdu", false); + mutex_lock(&ar->conf_mutex); if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) && arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA && ar->ab->hw_params.supports_rssi_stats && @@ -9126,6 +9122,7 @@ static void ath11k_mac_op_sta_statistics(struct ieee80211_hw *hw, !(ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, WMI_REQUEST_VDEV_STAT))) signal = arsta->rssi_beacon; + mutex_unlock(&ar->conf_mutex); ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "sta statistics db2dbm %u rssi comb %d rssi beacon %d\n", From 29e2adf2ef2966379bd3fe002530b10dfc3030ba Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 20 Feb 2025 16:24:48 +0800 Subject: [PATCH 046/497] wifi: ath11k: consistently use ath11k_mac_get_fw_stats() Currently to get firmware stats, ath11k_mac_op_get_txpower() calls ath11k_fw_stats_request() and ath11k_mac_op_sta_statistics() calls ath11k_mac_get_fw_stats(). Those two helpers are basically doing the same, except for: 1. ath11k_mac_get_fw_stats() verifies ar->state inside itself. 2. ath11k_mac_get_fw_stats() calls ath11k_mac_fw_stats_request() which then calls ath11k_mac_fw_stats_reset() to free pdev/vdev stats whereas only pdev stats are freed by ath11k_fw_stats_request(). 3. ath11k_mac_get_fw_stats() waits for ar->fw_stats_complete and ar->fw_stats_done, whereas ath11k_fw_stats_request() only waits for ar->fw_stats_complete. Change to call ath11k_mac_get_fw_stats() in ath11k_mac_op_get_txpower(). This is valid because: 1. ath11k_mac_op_get_txpower() also has the same request on ar->state. 2. it is harmless to call ath11k_fw_stats_vdevs_free() since ar->fw_stats.vdevs should be empty and there should be no one expecting it at that time. 3. ath11k_mac_op_get_txpower() only needs pdev stats. For pdev stats, ar->fw_stats_done is set to true whenever ar->fw_stats_complete is set to true in ath11k_update_stats_event(). So additional wait on ar->fw_stats_done does not wast any time. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.37 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250220082448.31039-8-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 44 ++------------------------- 1 file changed, 2 insertions(+), 42 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 18e1d78cac45..13301ca317a5 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9457,40 +9457,6 @@ static int ath11k_mac_op_remain_on_channel(struct ieee80211_hw *hw, return ret; } -static int ath11k_fw_stats_request(struct ath11k *ar, - struct stats_request_params *req_param) -{ - struct ath11k_base *ab = ar->ab; - unsigned long time_left; - int ret; - - lockdep_assert_held(&ar->conf_mutex); - - spin_lock_bh(&ar->data_lock); - ath11k_fw_stats_pdevs_free(&ar->fw_stats.pdevs); - ar->fw_stats.num_vdev_recvd = 0; - ar->fw_stats.num_bcn_recvd = 0; - spin_unlock_bh(&ar->data_lock); - - reinit_completion(&ar->fw_stats_complete); - reinit_completion(&ar->fw_stats_done); - - ret = ath11k_wmi_send_stats_request_cmd(ar, req_param); - if (ret) { - ath11k_warn(ab, "could not request fw stats (%d)\n", - ret); - return ret; - } - - time_left = wait_for_completion_timeout(&ar->fw_stats_complete, - 1 * HZ); - - if (!time_left) - return -ETIMEDOUT; - - return 0; -} - static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, unsigned int link_id, @@ -9498,7 +9464,6 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, { struct ath11k *ar = hw->priv; struct ath11k_base *ab = ar->ab; - struct stats_request_params req_param = {0}; struct ath11k_fw_stats_pdev *pdev; int ret; @@ -9510,9 +9475,6 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, */ mutex_lock(&ar->conf_mutex); - if (ar->state != ATH11K_STATE_ON) - goto err_fallback; - /* Firmware doesn't provide Tx power during CAC hence no need to fetch * the stats. */ @@ -9521,10 +9483,8 @@ static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, return -EAGAIN; } - req_param.pdev_id = ar->pdev->pdev_id; - req_param.stats_id = WMI_REQUEST_PDEV_STAT; - - ret = ath11k_fw_stats_request(ar, &req_param); + ret = ath11k_mac_get_fw_stats(ar, ar->pdev->pdev_id, 0, + WMI_REQUEST_PDEV_STAT); if (ret) { ath11k_warn(ab, "failed to request fw pdev stats: %d\n", ret); goto err_fallback; From b0d226a60856a1b765bb9a3848c7b2322fd08c47 Mon Sep 17 00:00:00 2001 From: Rodrigo Gobbi Date: Thu, 22 May 2025 17:01:12 -0300 Subject: [PATCH 047/497] wifi: ath11k: validate ath11k_crypto_mode on top of ath11k_core_qmi_firmware_ready if ath11k_crypto_mode is invalid (not ATH11K_CRYPT_MODE_SW/ATH11K_CRYPT_MODE_HW), ath11k_core_qmi_firmware_ready() will not undo some actions that was previously started/configured. Do the validation as soon as possible in order to avoid undoing actions in that case and also to fix the following smatch warning: drivers/net/wireless/ath/ath11k/core.c:2166 ath11k_core_qmi_firmware_ready() warn: missing unwind goto? Signed-off-by: Rodrigo Gobbi Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202304151955.oqAetVFd-lkp@intel.com/ Fixes: aa2092a9bab3 ("ath11k: add raw mode and software crypto support") Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250522200519.16858-1-rodrigo.gobbi.7@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index c8c5008be7f2..22a101136135 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -2135,6 +2135,20 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) { int ret; + switch (ath11k_crypto_mode) { + case ATH11K_CRYPT_MODE_SW: + set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + case ATH11K_CRYPT_MODE_HW: + clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); + clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); + break; + default: + ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); + return -EINVAL; + } + ret = ath11k_core_start_firmware(ab, ab->fw_mode); if (ret) { ath11k_err(ab, "failed to start firmware: %d\n", ret); @@ -2153,20 +2167,6 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab) goto err_firmware_stop; } - switch (ath11k_crypto_mode) { - case ATH11K_CRYPT_MODE_SW: - set_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - case ATH11K_CRYPT_MODE_HW: - clear_bit(ATH11K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags); - clear_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); - break; - default: - ath11k_info(ab, "invalid crypto_mode: %d\n", ath11k_crypto_mode); - return -EINVAL; - } - if (ath11k_frame_mode == ATH11K_HW_TXRX_RAW) set_bit(ATH11K_FLAG_RAW_MODE, &ab->dev_flags); From 7588a893cde5385ad308400ff167d29a29913b3a Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 23 May 2025 10:23:05 +0800 Subject: [PATCH 048/497] wifi: ath12k: fix GCC_GCC_PCIE_HOT_RST definition for WCN7850 GCC_GCC_PCIE_HOT_RST is wrongly defined for WCN7850, causing kernel crash on some specific platforms. Since this register is divergent for WCN7850 and QCN9274, move it to register table to allow different definitions. Then correct the register address for WCN7850 to fix this issue. Note IPQ5332 is not affected as it is not PCIe based device. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Reported-by: Parth Pancholi Closes: https://lore.kernel.org/all/86899b2235a59c9134603beebe08f2bb0b244ea0.camel@gmail.com Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Tested-by: Parth Pancholi Link: https://patch.msgid.link/20250523-ath12k-wrong-global-reset-addr-v1-1-3b06eb556196@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hw.c | 6 ++++++ drivers/net/wireless/ath/ath12k/hw.h | 2 ++ drivers/net/wireless/ath/ath12k/pci.c | 6 +++--- drivers/net/wireless/ath/ath12k/pci.h | 4 +++- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index 7e2cf0fb2085..8254dc10b53b 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -951,6 +951,8 @@ static const struct ath12k_hw_regs qcn9274_v1_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs qcn9274_v2_regs = { @@ -1042,6 +1044,8 @@ static const struct ath12k_hw_regs qcn9274_v2_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e38338, }; static const struct ath12k_hw_regs ipq5332_regs = { @@ -1215,6 +1219,8 @@ static const struct ath12k_hw_regs wcn7850_regs = { .hal_umac_ce0_dest_reg_base = 0x01b81000, .hal_umac_ce1_src_reg_base = 0x01b82000, .hal_umac_ce1_dest_reg_base = 0x01b83000, + + .gcc_gcc_pcie_hot_rst = 0x1e40304, }; static const struct ath12k_hw_hal_params ath12k_hw_hal_params_qcn9274 = { diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index 0fbc17649df4..0a75bc5abfa2 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -375,6 +375,8 @@ struct ath12k_hw_regs { u32 hal_reo_cmd_ring_base; u32 hal_reo_status_ring_base; + + u32 gcc_gcc_pcie_hot_rst; }; static inline const char *ath12k_bd_ie_type_str(enum ath12k_bd_ie_type type) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 489d546390fc..1f3cfd9b89fd 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -292,10 +292,10 @@ static void ath12k_pci_enable_ltssm(struct ath12k_base *ab) ath12k_dbg(ab, ATH12K_DBG_PCI, "pci ltssm 0x%x\n", val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); val |= GCC_GCC_PCIE_HOT_RST_VAL; - ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST, val); - val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST); + ath12k_pci_write32(ab, GCC_GCC_PCIE_HOT_RST(ab), val); + val = ath12k_pci_read32(ab, GCC_GCC_PCIE_HOT_RST(ab)); ath12k_dbg(ab, ATH12K_DBG_PCI, "pci pcie_hot_rst 0x%x\n", val); diff --git a/drivers/net/wireless/ath/ath12k/pci.h b/drivers/net/wireless/ath/ath12k/pci.h index 0b4c459d6d8e..d1ec8aad7f6c 100644 --- a/drivers/net/wireless/ath/ath12k/pci.h +++ b/drivers/net/wireless/ath/ath12k/pci.h @@ -28,7 +28,9 @@ #define PCIE_PCIE_PARF_LTSSM 0x1e081b0 #define PARM_LTSSM_VALUE 0x111 -#define GCC_GCC_PCIE_HOT_RST 0x1e38338 +#define GCC_GCC_PCIE_HOT_RST(ab) \ + ((ab)->hw_params->regs->gcc_gcc_pcie_hot_rst) + #define GCC_GCC_PCIE_HOT_RST_VAL 0x10 #define PCIE_PCIE_INT_ALL_CLEAR 0x1e08228 From b6bca6d7149e0bc1a56e831af0296d45688945ec Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 5 Jun 2025 10:27:22 -0700 Subject: [PATCH 049/497] wifi: ath12k: Fix hal_reo_cmd_status kernel-doc Currently a warning is reported when running: % scripts/kernel-doc -Wall -Werror -none drivers/net/wireless/ath/ath12k/hal.h Warning: drivers/net/wireless/ath/ath12k/hal.h:596 Enum value 'HAL_REO_CMD_RESOURCE_BLOCKED' not described in enum 'hal_reo_cmd_status' Add the missing description of HAL_REO_CMD_RESOURCE_BLOCKED. Link: https://patch.msgid.link/20250605-hal_reo_cmd_status-kdoc-v1-1-e59f4b814b88@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 0ee9c6b26dab..c1750b5dc03c 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -585,7 +585,8 @@ enum hal_reo_cmd_type { * or cache was blocked * @HAL_REO_CMD_FAILED: Command execution failed, could be due to * invalid queue desc - * @HAL_REO_CMD_RESOURCE_BLOCKED: + * @HAL_REO_CMD_RESOURCE_BLOCKED: Command could not be executed because + * one or more descriptors were blocked * @HAL_REO_CMD_DRAIN: */ enum hal_reo_cmd_status { From f3fe49dbddd73f0155a8935af47cb63693069dbe Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Wed, 4 Jun 2025 13:52:50 +0800 Subject: [PATCH 050/497] wifi: ath12k: fix uaf in ath12k_core_init() When the execution of ath12k_core_hw_group_assign() or ath12k_core_hw_group_create() fails, the registered notifier chain is not unregistered properly. Its memory is freed after rmmod, which may trigger to a use-after-free (UAF) issue if there is a subsequent access to this notifier chain. Fixes the issue by calling ath12k_core_panic_notifier_unregister() in failure cases. Call trace: notifier_chain_register+0x4c/0x1f0 (P) atomic_notifier_chain_register+0x38/0x68 ath12k_core_init+0x50/0x4e8 [ath12k] ath12k_pci_probe+0x5f8/0xc28 [ath12k] pci_device_probe+0xbc/0x1a8 really_probe+0xc8/0x3a0 __driver_probe_device+0x84/0x1b0 driver_probe_device+0x44/0x130 __driver_attach+0xcc/0x208 bus_for_each_dev+0x84/0x100 driver_attach+0x2c/0x40 bus_add_driver+0x130/0x260 driver_register+0x70/0x138 __pci_register_driver+0x68/0x80 ath12k_pci_init+0x30/0x68 [ath12k] ath12k_init+0x28/0x78 [ath12k] Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Fixes: 6f245ea0ec6c ("wifi: ath12k: introduce device group abstraction") Signed-off-by: Miaoqing Pan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250604055250.1228501-1-miaoqing.pan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 31d851d8e688..ebc0560d40e3 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -2129,7 +2129,8 @@ int ath12k_core_init(struct ath12k_base *ab) if (!ag) { mutex_unlock(&ath12k_hw_group_mutex); ath12k_warn(ab, "unable to get hw group\n"); - return -ENODEV; + ret = -ENODEV; + goto err_unregister_notifier; } mutex_unlock(&ath12k_hw_group_mutex); @@ -2144,7 +2145,7 @@ int ath12k_core_init(struct ath12k_base *ab) if (ret) { mutex_unlock(&ag->mutex); ath12k_warn(ab, "unable to create hw group\n"); - goto err; + goto err_destroy_hw_group; } } @@ -2152,9 +2153,12 @@ int ath12k_core_init(struct ath12k_base *ab) return 0; -err: +err_destroy_hw_group: ath12k_core_hw_group_destroy(ab->ag); ath12k_core_hw_group_unassign(ab); +err_unregister_notifier: + ath12k_core_panic_notifier_unregister(ab); + return ret; } From 06118ae36855b7d3d22688298e74a766ccf0cb7a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 27 May 2025 08:44:14 +0300 Subject: [PATCH 051/497] regulator: max20086: Fix refcount leak in max20086_parse_regulators_dt() There is a missing call to of_node_put() if devm_kcalloc() fails. Fix this by changing the code to use cleanup.h magic to drop the refcount. Fixes: 6b0cd72757c6 ("regulator: max20086: fix invalid memory access") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aDVRLqgJWMxYU03G@stanley.mountain Reviewed-by: Laurent Pinchart Signed-off-by: Mark Brown --- drivers/regulator/max20086-regulator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index b4fe76e33ff2..fcdd2d0317a5 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -5,6 +5,7 @@ // Copyright (C) 2022 Laurent Pinchart // Copyright (C) 2018 Avnet, Inc. +#include #include #include #include @@ -133,11 +134,11 @@ static int max20086_regulators_register(struct max20086 *chip) static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) { struct of_regulator_match *matches; - struct device_node *node; unsigned int i; int ret; - node = of_get_child_by_name(chip->dev->of_node, "regulators"); + struct device_node *node __free(device_node) = + of_get_child_by_name(chip->dev->of_node, "regulators"); if (!node) { dev_err(chip->dev, "regulators node not found\n"); return -ENODEV; @@ -153,7 +154,6 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) ret = of_regulator_match(chip->dev, node, matches, chip->info->num_outputs); - of_node_put(node); if (ret < 0) { dev_err(chip->dev, "Failed to match regulators\n"); return -EINVAL; From a5bf5272295d3f058adeee025d2a0b6625f8ba7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Pi=C3=A9dallu?= Date: Fri, 6 Jun 2025 15:37:24 +0200 Subject: [PATCH 052/497] spi: omap2-mcspi: Disable multi mode when CS should be kept asserted after message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the last transfer of a SPI message has the cs_change flag, the CS is kept asserted after the message. Multi-mode can't respect this as CS is deasserted by the hardware at the end of the message. Disable multi-mode when not applicable to the current message. Fixes: d153ff4056cb ("spi: omap2-mcspi: Add support for MULTI-mode") Signed-off-by: Félix Piédallu Link: https://patch.msgid.link/20250606-cs_change_fix-v1-1-27191a98a2e5@non.se.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap2-mcspi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 29c616e2c408..e834fb42fd4b 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -1287,9 +1287,15 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, mcspi->use_multi_mode = false; } - /* Check if transfer asks to change the CS status after the transfer */ - if (!tr->cs_change) - mcspi->use_multi_mode = false; + if (list_is_last(&tr->transfer_list, &msg->transfers)) { + /* Check if transfer asks to keep the CS status after the whole message */ + if (tr->cs_change) + mcspi->use_multi_mode = false; + } else { + /* Check if transfer asks to change the CS status after the transfer */ + if (!tr->cs_change) + mcspi->use_multi_mode = false; + } /* * If at least one message is not compatible, switch back to single mode From 10c24e0d2f7cd2bc8a847cf750f01301ce67dbc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Pi=C3=A9dallu?= Date: Fri, 6 Jun 2025 15:37:25 +0200 Subject: [PATCH 053/497] spi: omap2-mcspi: Disable multi-mode when the previous message kept CS asserted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the last transfer of a SPI message has the cs_change flag, the CS is kept asserted after the message. The next message can't use multi-mode because the CS will be briefly deasserted before the first transfer. Remove the early exit of the list_for_each_entry because the last transfer actually needs to be always checked. Fixes: d153ff4056cb ("spi: omap2-mcspi: Add support for MULTI-mode") Signed-off-by: Félix Piédallu Link: https://patch.msgid.link/20250606-cs_change_fix-v1-2-27191a98a2e5@non.se.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap2-mcspi.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index e834fb42fd4b..70bb74b3bd9c 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -134,6 +134,7 @@ struct omap2_mcspi { size_t max_xfer_len; u32 ref_clk_hz; bool use_multi_mode; + bool last_msg_kept_cs; }; struct omap2_mcspi_cs { @@ -1269,6 +1270,10 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, * multi-mode is applicable. */ mcspi->use_multi_mode = true; + + if (mcspi->last_msg_kept_cs) + mcspi->use_multi_mode = false; + list_for_each_entry(tr, &msg->transfers, transfer_list) { if (!tr->bits_per_word) bits_per_word = msg->spi->bits_per_word; @@ -1289,22 +1294,17 @@ static int omap2_mcspi_prepare_message(struct spi_controller *ctlr, if (list_is_last(&tr->transfer_list, &msg->transfers)) { /* Check if transfer asks to keep the CS status after the whole message */ - if (tr->cs_change) + if (tr->cs_change) { mcspi->use_multi_mode = false; + mcspi->last_msg_kept_cs = true; + } else { + mcspi->last_msg_kept_cs = false; + } } else { /* Check if transfer asks to change the CS status after the transfer */ if (!tr->cs_change) mcspi->use_multi_mode = false; } - - /* - * If at least one message is not compatible, switch back to single mode - * - * The bits_per_word of certain transfer can be different, but it will have no - * impact on the signal itself. - */ - if (!mcspi->use_multi_mode) - break; } omap2_mcspi_set_mode(ctlr); From 1dd630088332b5b310f3dd7eaacae9f3c4465651 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 8 Jun 2025 22:29:39 +0800 Subject: [PATCH 054/497] spi: loongson: Fix build warnings about export.h After commit a934a57a42f64a4 ("scripts/misc-check: check missing #include when W=1") and 7d95680d64ac8e836c ("scripts/misc-check: check unnecessary #include when W=1"), we get some build warnings with W=1: drivers/spi/spi-loongson-core.c: warning: EXPORT_SYMBOL() is used, but #include is missing So fix these build warnings for SPI/Loongson. Signed-off-by: Huacai Chen Link: https://patch.msgid.link/20250608142939.172108-1-chenhuacai@loongson.cn Signed-off-by: Mark Brown --- drivers/spi/spi-loongson-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-loongson-core.c b/drivers/spi/spi-loongson-core.c index 4fec226456d1..b46f072a0387 100644 --- a/drivers/spi/spi-loongson-core.c +++ b/drivers/spi/spi-loongson-core.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include From 2b74aea6d078ade810a3b0f7d1bcfcba2eaad416 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Jun 2025 12:04:14 +0300 Subject: [PATCH 055/497] spi: spi-pci1xxxx: Fix error code in probe Return the error code if pci_alloc_irq_vectors() fails. Don't return success. Fixes: b4608e944177 ("spi: spi-pci1xxxx: Fix Probe failure with Dual SPI instance with INTx interrupts") Signed-off-by: Dan Carpenter Reviewed-by: Thangaraj Samynathan Link: https://patch.msgid.link/aEKvDrUxD19GWi0u@stanley.mountain Signed-off-by: Mark Brown --- drivers/spi/spi-pci1xxxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c index c6489e90b8b9..9112d8a1a0c8 100644 --- a/drivers/spi/spi-pci1xxxx.c +++ b/drivers/spi/spi-pci1xxxx.c @@ -765,7 +765,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id * PCI_IRQ_ALL_TYPES); if (num_vector < 0) { dev_err(&pdev->dev, "Error allocating MSI vectors\n"); - return ret; + return num_vector; } init_completion(&spi_sub_ptr->spi_xfer_done); From 83c4c67076c209787515e06fffd41dd0bdab09b9 Mon Sep 17 00:00:00 2001 From: "James A. MacInnes" Date: Wed, 12 Feb 2025 15:03:46 -0800 Subject: [PATCH 056/497] drm/msm/dp: Disable wide bus support for SDM845 When widebus was enabled for DisplayPort in commit c7c412202623 ("drm/msm/dp: enable widebus on all relevant chipsets") it was clarified that it is only supported on DPU 5.0.0 onwards which includes SC7180 on DPU revision 6.2. However, this patch missed that the description structure for SC7180 is also reused for SDM845 (because of identical io_start address) which is only DPU 4.0.0, leading to a wrongly enbled widebus feature and corruption on that platform. Create a separate msm_dp_desc_sdm845 structure for this SoC compatible, with the wide_bus_supported flag turned off. Fixes: c7c412202623 ("drm/msm/dp: enable widebus on all relevant chipsets") Signed-off-by: James A. MacInnes [DB: reworded commit text following Marijn's suggestion] Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/636944/ Link: https://lore.kernel.org/r/20250212-sdm845_dp-v2-1-4954e51458f4@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dp/dp_display.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 386c4669c831..a48e6db4f156 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -128,6 +128,11 @@ static const struct msm_dp_desc msm_dp_desc_sa8775p[] = { {} }; +static const struct msm_dp_desc msm_dp_desc_sdm845[] = { + { .io_start = 0x0ae90000, .id = MSM_DP_CONTROLLER_0 }, + {} +}; + static const struct msm_dp_desc msm_dp_desc_sc7180[] = { { .io_start = 0x0ae90000, .id = MSM_DP_CONTROLLER_0, .wide_bus_supported = true }, {} @@ -180,7 +185,7 @@ static const struct of_device_id msm_dp_dt_match[] = { { .compatible = "qcom,sc8180x-edp", .data = &msm_dp_desc_sc8180x }, { .compatible = "qcom,sc8280xp-dp", .data = &msm_dp_desc_sc8280xp }, { .compatible = "qcom,sc8280xp-edp", .data = &msm_dp_desc_sc8280xp }, - { .compatible = "qcom,sdm845-dp", .data = &msm_dp_desc_sc7180 }, + { .compatible = "qcom,sdm845-dp", .data = &msm_dp_desc_sdm845 }, { .compatible = "qcom,sm8350-dp", .data = &msm_dp_desc_sc7180 }, { .compatible = "qcom,sm8650-dp", .data = &msm_dp_desc_sm8650 }, { .compatible = "qcom,x1e80100-dp", .data = &msm_dp_desc_x1e80100 }, From 146e87f3e11de0dfa091ff87e34b4bc6eec761a4 Mon Sep 17 00:00:00 2001 From: "James A. MacInnes" Date: Wed, 12 Feb 2025 15:03:47 -0800 Subject: [PATCH 057/497] drm/msm/disp: Correct porch timing for SDM845 Type-C DisplayPort inoperable due to incorrect porch settings. - Re-used wide_bus_en as flag to prevent porch shifting Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: James A. MacInnes Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/636945/ Link: https://lore.kernel.org/r/20250212-sdm845_dp-v2-2-4954e51458f4@gmail.com Signed-off-by: Dmitry Baryshkov --- .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c index 8a618841e3ea..1c468ca5d692 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c @@ -94,17 +94,21 @@ static void drm_mode_to_intf_timing_params( timing->vsync_polarity = 0; } - /* for DP/EDP, Shift timings to align it to bottom right */ - if (phys_enc->hw_intf->cap->type == INTF_DP) { + timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); + timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); + + /* + * For DP/EDP, Shift timings to align it to bottom right. + * wide_bus_en is set for everything excluding SDM845 & + * porch changes cause DisplayPort failure and HDMI tearing. + */ + if (phys_enc->hw_intf->cap->type == INTF_DP && timing->wide_bus_en) { timing->h_back_porch += timing->h_front_porch; timing->h_front_porch = 0; timing->v_back_porch += timing->v_front_porch; timing->v_front_porch = 0; } - timing->wide_bus_en = dpu_encoder_is_widebus_enabled(phys_enc->parent); - timing->compression_en = dpu_encoder_is_dsc_enabled(phys_enc->parent); - /* * for DP, divide the horizonal parameters by 2 when * widebus is enabled From 8a48e35becb214743214f5504e726c3ec131cd6d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 20 May 2025 13:13:26 +0200 Subject: [PATCH 058/497] drm/msm/dsi/dsi_phy_10nm: Fix missing initial VCO rate Driver unconditionally saves current state on first init in dsi_pll_10nm_init(), but does not save the VCO rate, only some of the divider registers. The state is then restored during probe/enable via msm_dsi_phy_enable() -> msm_dsi_phy_pll_restore_state() -> dsi_10nm_pll_restore_state(). Restoring calls dsi_pll_10nm_vco_set_rate() with pll_10nm->vco_current_rate=0, which basically overwrites existing rate of VCO and messes with clock hierarchy, by setting frequency to 0 to clock tree. This makes anyway little sense - VCO rate was not saved, so should not be restored. If PLL was not configured configure it to minimum rate to avoid glitches and configuring entire in clock hierarchy to 0 Hz. Suggested-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/sz4kbwy5nwsebgf64ia7uq4ee7wbsa5uy3xmlqwcstsbntzcov@ew3dcyjdzmi2/ Signed-off-by: Krzysztof Kozlowski Fixes: a4ccc37693a2 ("drm/msm/dsi_pll_10nm: restore VCO rate during Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/654796/ Link: https://lore.kernel.org/r/20250520111325.92352-2-krzysztof.kozlowski@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index 9812b4d69197..af2e30f3f842 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -704,6 +704,13 @@ static int dsi_pll_10nm_init(struct msm_dsi_phy *phy) /* TODO: Remove this when we have proper display handover support */ msm_dsi_phy_pll_save_state(phy); + /* + * Store also proper vco_current_rate, because its value will be used in + * dsi_10nm_pll_restore_state(). + */ + if (!dsi_pll_10nm_vco_recalc_rate(&pll_10nm->clk_hw, VCO_REF_CLK_RATE)) + pll_10nm->vco_current_rate = pll_10nm->phy->cfg->min_pll_rate; + return 0; } From 5808c34216954cd832bd4b8bc52dfa287049122b Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Mon, 26 May 2025 04:18:07 +0800 Subject: [PATCH 059/497] platform/x86: ideapad-laptop: use usleep_range() for EC polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was reported that ideapad-laptop sometimes causes some recent (since 2024) Lenovo ThinkBook models shut down when: - suspending/resuming - closing/opening the lid - (dis)connecting a charger - reading/writing some sysfs properties, e.g., fan_mode, touchpad - pressing down some Fn keys, e.g., Brightness Up/Down (Fn+F5/F6) - (seldom) loading the kmod The issue has existed since the launch day of such models, and there have been some out-of-tree workarounds (see Link:) for the issue. One disables some functionalities, while another one simply shortens IDEAPAD_EC_TIMEOUT. The disabled functionalities have read_ec_data() in their call chains, which calls schedule() between each poll. It turns out that these models suffer from the indeterminacy of schedule() because of their low tolerance for being polled too frequently. Sometimes schedule() returns too soon due to the lack of ready tasks, causing the margin between two polls to be too short. In this case, the command is somehow aborted, and too many subsequent polls (they poll for "nothing!") may eventually break the state machine in the EC, resulting in a hard shutdown. This explains why shortening IDEAPAD_EC_TIMEOUT works around the issue - it reduces the total number of polls sent to the EC. Even when it doesn't lead to a shutdown, frequent polls may also disturb the ongoing operation and notably delay (+ 10-20ms) the availability of EC response. This phenomenon is unlikely to be exclusive to the models mentioned above, so dropping the schedule() manner should also slightly improve the responsiveness of various models. Fix these issues by migrating to usleep_range(150, 300). The interval is chosen to add some margin to the minimal 50us and considering EC responses are usually available after 150-2500us based on my test. It should be enough to fix these issues on all models subject to the EC bug without introducing latency on other models. Tested on ThinkBook 14 G7+ ASP and solved both issues. No regression was introduced in the test on a model without the EC bug (ThinkBook X IMH, thanks Eric). Link: https://github.com/ty2/ideapad-laptop-tb2024g6plus/commit/6c5db18c9e8109873c2c90a7d2d7f552148f7ad4 Link: https://github.com/ferstar/ideapad-laptop-tb/commit/42d1e68e5009529d31bd23f978f636f79c023e80 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218771 Fixes: 6a09f21dd1e2 ("ideapad: add ACPI helpers") Cc: stable@vger.kernel.org Tested-by: Felix Yan Tested-by: Eric Long Tested-by: Jianfei Zhang Tested-by: Mingcong Bai Tested-by: Minh Le Tested-by: Sicheng Zhu Signed-off-by: Rong Zhang Link: https://lore.kernel.org/r/20250525201833.37939-1-i@rong.moe Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/ideapad-laptop.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index ede483573fe0..b5e4da6a6779 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct ideapad_private *priv) */ #define IDEAPAD_EC_TIMEOUT 200 /* in ms */ +/* + * Some models (e.g., ThinkBook since 2024) have a low tolerance for being + * polled too frequently. Doing so may break the state machine in the EC, + * resulting in a hard shutdown. + * + * It is also observed that frequent polls may disturb the ongoing operation + * and notably delay the availability of EC response. + * + * These values are used as the delay before the first poll and the interval + * between subsequent polls to solve the above issues. + */ +#define IDEAPAD_EC_POLL_MIN_US 150 +#define IDEAPAD_EC_POLL_MAX_US 300 + static int eval_int(acpi_handle handle, const char *name, unsigned long *res) { unsigned long long result; @@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle handle, unsigned long cmd, unsigned long *da end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1; while (time_before(jiffies, end_jiffies)) { - schedule(); + usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US); err = eval_vpcr(handle, 1, &val); if (err) @@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle handle, unsigned long cmd, unsigned long dat end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1; while (time_before(jiffies, end_jiffies)) { - schedule(); + usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US); err = eval_vpcr(handle, 1, &val); if (err) From 685f88c72a0c4d12d3bd2ff50286938f14486f85 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 6 Jun 2025 13:53:00 -0700 Subject: [PATCH 060/497] platform/x86/intel-uncore-freq: Fail module load when plat_info is NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address a Smatch static checker warning regarding an unchecked dereference in the function call: set_cdie_id(i, cluster_info, plat_info) when plat_info is NULL. Instead of addressing this one case, in general if plat_info is NULL then it can cause other issues. For example in a two package system it will give warning for duplicate sysfs entry as package ID will be always zero for both packages when creating string for attribute group name. plat_info is derived from TPMI ID TPMI_BUS_INFO, which is integral to the core TPMI design. Therefore, it should not be NULL on a production platform. Consequently, the module should fail to load if plat_info is NULL. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/platform-driver-x86/aEKvGCLd1qmX04Tc@stanley.mountain/T/#u Fixes: 8a54e2253e4c ("platform/x86/intel-uncore-freq: Uncore frequency control via TPMI") Signed-off-by: Srinivas Pandruvada Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250606205300.2384494-1-srinivas.pandruvada@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index 1c7b2f2716ca..44d9948ed224 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -511,10 +511,13 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ /* Get the package ID from the TPMI core */ plat_info = tpmi_get_platform_data(auxdev); - if (plat_info) - pkg = plat_info->package_id; - else + if (unlikely(!plat_info)) { dev_info(&auxdev->dev, "Platform information is NULL\n"); + ret = -ENODEV; + goto err_rem_common; + } + + pkg = plat_info->package_id; for (i = 0; i < num_resources; ++i) { struct tpmi_uncore_power_domain_info *pd_info; From afbdc4bbb3a6418778cf969b0795bbaa8237cdd3 Mon Sep 17 00:00:00 2001 From: Joshua Grisham Date: Fri, 6 Jun 2025 15:09:08 +0200 Subject: [PATCH 061/497] platform/x86: samsung-galaxybook: Add SAM0426 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add device ID SAM0426 (Notebook 9 Pro and similar devices) as reported and tested by GitHub user "diego-karsa" [1]. [1]: https://github.com/joshuagrisham/samsung-galaxybook-extras/issues/69 Signed-off-by: Joshua Grisham Reviewed-by: Armin Wolf Link: https://lore.kernel.org/r/20250606130909.207047-1-josh@joshuagrisham.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/samsung-galaxybook.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/samsung-galaxybook.c b/drivers/platform/x86/samsung-galaxybook.c index 5878a351993e..3c13e13d4885 100644 --- a/drivers/platform/x86/samsung-galaxybook.c +++ b/drivers/platform/x86/samsung-galaxybook.c @@ -1403,6 +1403,7 @@ static int galaxybook_probe(struct platform_device *pdev) } static const struct acpi_device_id galaxybook_device_ids[] = { + { "SAM0426" }, { "SAM0427" }, { "SAM0428" }, { "SAM0429" }, From 1d0a61940e22e165e6acc4a9c6fb26edbe69112e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Jun 2025 12:04:32 +0300 Subject: [PATCH 062/497] platform/x86/intel: power-domains: Fix error code in tpmi_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return -ENOMEM instead of success if kcalloc() fails. Fixes: e37be5d85c60 ("platform/x86/intel: power-domains: Add interface to get Linux die ID") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/aEKvIGCt6d8Gcx4S@stanley.mountain Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/tpmi_power_domains.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/tpmi_power_domains.c b/drivers/platform/x86/intel/tpmi_power_domains.c index 0c5c88eb7baf..9d8247bb9cfa 100644 --- a/drivers/platform/x86/intel/tpmi_power_domains.c +++ b/drivers/platform/x86/intel/tpmi_power_domains.c @@ -228,8 +228,10 @@ static int __init tpmi_init(void) domain_die_map = kcalloc(size_mul(topology_max_packages(), MAX_POWER_DOMAINS), sizeof(*domain_die_map), GFP_KERNEL); - if (!domain_die_map) + if (!domain_die_map) { + ret = -ENOMEM; goto free_domain_mask; + } ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "platform/x86/tpmi_power_domains:online", From 4dbd11796f3a8eb95647507befc41995458a4023 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 3 Jun 2025 08:24:08 -0500 Subject: [PATCH 063/497] platform/x86/amd: pmc: Clear metrics table at start of cycle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The area of memory that contains the metrics table may contain garbage when the cycle starts. This normally doesn't matter because the cycle itself will populate it with valid data, however commit 9f5595d5f03fd ("platform/x86/amd: pmc: Require at least 2.5 seconds between HW sleep cycles") started to use it during the check() phase. Depending upon what garbage is in the table it's possible that the system will wait 2.5 seconds for even the first cycle, which will be visible to a user. To prevent this from happening explicitly clear the table when logging is started. Fixes: 9f5595d5f03fd ("platform/x86/amd: pmc: Require at least 2.5 seconds between HW sleep cycles") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20250603132412.3555302-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index 37c7a57afee5..0b9b23eb7c2c 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -157,6 +157,8 @@ static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev) return -ENOMEM; } + memset_io(dev->smu_virt_addr, 0, sizeof(struct smu_metrics)); + /* Start the logging */ amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_RESET, false); amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, false); From f8afb12a2d7503de6558c23cacd7acbf6e9fe678 Mon Sep 17 00:00:00 2001 From: Jake Hillion Date: Thu, 5 Jun 2025 19:09:26 +0100 Subject: [PATCH 064/497] x86/platform/amd: move final timeout check to after final sleep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __hsmp_send_message sleeps between result read attempts and has a timeout of 100ms. Under extreme load it's possible for these sleeps to take a long time, exceeding the 100ms. In this case the current code does not check the register and fails with ETIMEDOUT. Refactor the loop to ensure there is at least one read of the register after a sleep of any duration. This removes instances of ETIMEDOUT with a single caller, even with a misbehaving scheduler. Tested on AMD Bergamo machines. Suggested-by: Blaise Sanouillet Reviewed-by: Suma Hegde Tested-by: Suma Hegde Signed-off-by: Jake Hillion Link: https://lore.kernel.org/r/20250605-amd-hsmp-v2-1-a811bc3dd74a@hillion.co.uk Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/hsmp/hsmp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index 538b36b97095..fa8bd5839dd2 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -97,7 +97,7 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms short_sleep = jiffies + msecs_to_jiffies(HSMP_SHORT_SLEEP); timeout = jiffies + msecs_to_jiffies(HSMP_MSG_TIMEOUT); - while (time_before(jiffies, timeout)) { + while (true) { ret = sock->amd_hsmp_rdwr(sock, mbinfo->msg_resp_off, &mbox_status, HSMP_RD); if (ret) { dev_err(sock->dev, "Error %d reading mailbox status\n", ret); @@ -106,6 +106,10 @@ static int __hsmp_send_message(struct hsmp_socket *sock, struct hsmp_message *ms if (mbox_status != HSMP_STATUS_NOT_READY) break; + + if (!time_before(jiffies, timeout)) + break; + if (time_before(jiffies, short_sleep)) usleep_range(50, 100); else From 784e48a82976ee0b645788750343cd1b28a372f3 Mon Sep 17 00:00:00 2001 From: Jake Hillion Date: Thu, 5 Jun 2025 19:09:27 +0100 Subject: [PATCH 065/497] x86/platform/amd: replace down_timeout() with down_interruptible() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently hsmp_send_message() uses down_timeout() with a 100ms timeout to take the semaphore. However __hsmp_send_message(), the content of the critical section, has a sleep in it. On systems with significantly delayed scheduling behaviour this may take over 100ms. Convert this method to down_interruptible(). Leave the error handling the same as the documentation currently is not specific about what error is returned. Previous behaviour: a caller who competes with another caller stuck in the critical section due to scheduler delays would receive -ETIME. New behaviour: a caller who competes with another caller stuck in the critical section due to scheduler delays will complete successfully. Reviewed-by: Suma Hegde Tested-by: Suma Hegde Signed-off-by: Jake Hillion Link: https://lore.kernel.org/r/20250605-amd-hsmp-v2-2-a811bc3dd74a@hillion.co.uk Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/hsmp/hsmp.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index fa8bd5839dd2..885e2f8136fd 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -214,13 +214,7 @@ int hsmp_send_message(struct hsmp_message *msg) return -ENODEV; sock = &hsmp_pdev.sock[msg->sock_ind]; - /* - * The time taken by smu operation to complete is between - * 10us to 1ms. Sometime it may take more time. - * In SMP system timeout of 100 millisecs should - * be enough for the previous thread to finish the operation - */ - ret = down_timeout(&sock->hsmp_sem, msecs_to_jiffies(HSMP_MSG_TIMEOUT)); + ret = down_interruptible(&sock->hsmp_sem); if (ret < 0) return ret; From 56ec63a6e107e724619e61c7e605b49d365dfa07 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 13 May 2025 21:38:59 +0300 Subject: [PATCH 066/497] pinctrl: qcom: switch to devm_gpiochip_add_data() In order to simplify cleanup actions, use devres-enabled version of gpiochip_add_data(). As the msm_pinctrl_remove() function is now empty, drop it and all its calls from the corresponding pinctrl drivers. Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/20250513-pinctrl-msm-fix-v2-3-249999af0fc1@oss.qualcomm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-apq8064.c | 1 - drivers/pinctrl/qcom/pinctrl-apq8084.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq4019.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq5018.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq5332.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq5424.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq6018.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq8064.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq8074.c | 1 - drivers/pinctrl/qcom/pinctrl-ipq9574.c | 1 - drivers/pinctrl/qcom/pinctrl-mdm9607.c | 1 - drivers/pinctrl/qcom/pinctrl-mdm9615.c | 1 - drivers/pinctrl/qcom/pinctrl-msm.c | 11 +---------- drivers/pinctrl/qcom/pinctrl-msm.h | 1 - drivers/pinctrl/qcom/pinctrl-msm8226.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8660.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8909.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8916.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8917.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8953.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8960.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8976.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8994.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8996.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8998.c | 1 - drivers/pinctrl/qcom/pinctrl-msm8x74.c | 1 - drivers/pinctrl/qcom/pinctrl-qcm2290.c | 1 - drivers/pinctrl/qcom/pinctrl-qcs404.c | 1 - drivers/pinctrl/qcom/pinctrl-qcs615.c | 1 - drivers/pinctrl/qcom/pinctrl-qcs8300.c | 1 - drivers/pinctrl/qcom/pinctrl-qdf2xxx.c | 1 - drivers/pinctrl/qcom/pinctrl-qdu1000.c | 1 - drivers/pinctrl/qcom/pinctrl-sa8775p.c | 1 - drivers/pinctrl/qcom/pinctrl-sar2130p.c | 1 - drivers/pinctrl/qcom/pinctrl-sc7180.c | 1 - drivers/pinctrl/qcom/pinctrl-sc7280.c | 1 - drivers/pinctrl/qcom/pinctrl-sc8180x.c | 1 - drivers/pinctrl/qcom/pinctrl-sc8280xp.c | 1 - drivers/pinctrl/qcom/pinctrl-sdm660.c | 1 - drivers/pinctrl/qcom/pinctrl-sdm670.c | 1 - drivers/pinctrl/qcom/pinctrl-sdm845.c | 1 - drivers/pinctrl/qcom/pinctrl-sdx55.c | 1 - drivers/pinctrl/qcom/pinctrl-sdx65.c | 1 - drivers/pinctrl/qcom/pinctrl-sdx75.c | 1 - drivers/pinctrl/qcom/pinctrl-sm4450.c | 1 - drivers/pinctrl/qcom/pinctrl-sm6115.c | 1 - drivers/pinctrl/qcom/pinctrl-sm6125.c | 1 - drivers/pinctrl/qcom/pinctrl-sm6350.c | 1 - drivers/pinctrl/qcom/pinctrl-sm6375.c | 1 - drivers/pinctrl/qcom/pinctrl-sm7150.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8150.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8250.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8350.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8450.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8550.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8650.c | 1 - drivers/pinctrl/qcom/pinctrl-sm8750.c | 1 - drivers/pinctrl/qcom/pinctrl-x1e80100.c | 1 - 58 files changed, 1 insertion(+), 67 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-apq8064.c b/drivers/pinctrl/qcom/pinctrl-apq8064.c index 20c3b9025044..3654913f1ae5 100644 --- a/drivers/pinctrl/qcom/pinctrl-apq8064.c +++ b/drivers/pinctrl/qcom/pinctrl-apq8064.c @@ -629,7 +629,6 @@ static struct platform_driver apq8064_pinctrl_driver = { .of_match_table = apq8064_pinctrl_of_match, }, .probe = apq8064_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init apq8064_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-apq8084.c b/drivers/pinctrl/qcom/pinctrl-apq8084.c index 3fc0a40762b6..27693cd64881 100644 --- a/drivers/pinctrl/qcom/pinctrl-apq8084.c +++ b/drivers/pinctrl/qcom/pinctrl-apq8084.c @@ -1207,7 +1207,6 @@ static struct platform_driver apq8084_pinctrl_driver = { .of_match_table = apq8084_pinctrl_of_match, }, .probe = apq8084_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init apq8084_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq4019.c b/drivers/pinctrl/qcom/pinctrl-ipq4019.c index 1f7944dd829d..6ede3149b6e1 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq4019.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq4019.c @@ -710,7 +710,6 @@ static struct platform_driver ipq4019_pinctrl_driver = { .of_match_table = ipq4019_pinctrl_of_match, }, .probe = ipq4019_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq4019_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5018.c b/drivers/pinctrl/qcom/pinctrl-ipq5018.c index e2951f81c3ee..10b99d5d8a11 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5018.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5018.c @@ -754,7 +754,6 @@ static struct platform_driver ipq5018_pinctrl_driver = { .of_match_table = ipq5018_pinctrl_of_match, }, .probe = ipq5018_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5018_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5332.c b/drivers/pinctrl/qcom/pinctrl-ipq5332.c index 625f8014051f..1ac2fc09c119 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5332.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5332.c @@ -834,7 +834,6 @@ static struct platform_driver ipq5332_pinctrl_driver = { .of_match_table = ipq5332_pinctrl_of_match, }, .probe = ipq5332_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5332_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq5424.c b/drivers/pinctrl/qcom/pinctrl-ipq5424.c index 0d610b076da3..7ff1f8acc1a3 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq5424.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq5424.c @@ -791,7 +791,6 @@ static struct platform_driver ipq5424_pinctrl_driver = { .of_match_table = ipq5424_pinctrl_of_match, }, .probe = ipq5424_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq5424_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq6018.c b/drivers/pinctrl/qcom/pinctrl-ipq6018.c index 0ad08647dbcd..a4ba980252e1 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq6018.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq6018.c @@ -1080,7 +1080,6 @@ static struct platform_driver ipq6018_pinctrl_driver = { .of_match_table = ipq6018_pinctrl_of_match, }, .probe = ipq6018_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq6018_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8064.c b/drivers/pinctrl/qcom/pinctrl-ipq8064.c index e2bb94e86aef..0a9e357e64c6 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq8064.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq8064.c @@ -631,7 +631,6 @@ static struct platform_driver ipq8064_pinctrl_driver = { .of_match_table = ipq8064_pinctrl_of_match, }, .probe = ipq8064_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq8064_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq8074.c b/drivers/pinctrl/qcom/pinctrl-ipq8074.c index 337f3a1c92c1..482f13282fc2 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq8074.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq8074.c @@ -1041,7 +1041,6 @@ static struct platform_driver ipq8074_pinctrl_driver = { .of_match_table = ipq8074_pinctrl_of_match, }, .probe = ipq8074_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq8074_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-ipq9574.c b/drivers/pinctrl/qcom/pinctrl-ipq9574.c index e2491617b236..89c05d8eb550 100644 --- a/drivers/pinctrl/qcom/pinctrl-ipq9574.c +++ b/drivers/pinctrl/qcom/pinctrl-ipq9574.c @@ -799,7 +799,6 @@ static struct platform_driver ipq9574_pinctrl_driver = { .of_match_table = ipq9574_pinctrl_of_match, }, .probe = ipq9574_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init ipq9574_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9607.c b/drivers/pinctrl/qcom/pinctrl-mdm9607.c index e7cd3ef1cf3e..3e18ba124fed 100644 --- a/drivers/pinctrl/qcom/pinctrl-mdm9607.c +++ b/drivers/pinctrl/qcom/pinctrl-mdm9607.c @@ -1059,7 +1059,6 @@ static struct platform_driver mdm9607_pinctrl_driver = { .of_match_table = mdm9607_pinctrl_of_match, }, .probe = mdm9607_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init mdm9607_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-mdm9615.c b/drivers/pinctrl/qcom/pinctrl-mdm9615.c index 0a2ae383d3d5..bea1ca3d1b7f 100644 --- a/drivers/pinctrl/qcom/pinctrl-mdm9615.c +++ b/drivers/pinctrl/qcom/pinctrl-mdm9615.c @@ -446,7 +446,6 @@ static struct platform_driver mdm9615_pinctrl_driver = { .of_match_table = mdm9615_pinctrl_of_match, }, .probe = mdm9615_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init mdm9615_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index f012ea88aa22..5c4687de1464 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -1442,7 +1442,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) girq->handler = handle_bad_irq; girq->parents[0] = pctrl->irq; - ret = gpiochip_add_data(&pctrl->chip, pctrl); + ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { dev_err(pctrl->dev, "Failed register gpiochip\n"); return ret; @@ -1463,7 +1463,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) dev_name(pctrl->dev), 0, 0, chip->ngpio); if (ret) { dev_err(pctrl->dev, "Failed to add pin range\n"); - gpiochip_remove(&pctrl->chip); return ret; } } @@ -1599,13 +1598,5 @@ int msm_pinctrl_probe(struct platform_device *pdev, } EXPORT_SYMBOL(msm_pinctrl_probe); -void msm_pinctrl_remove(struct platform_device *pdev) -{ - struct msm_pinctrl *pctrl = platform_get_drvdata(pdev); - - gpiochip_remove(&pctrl->chip); -} -EXPORT_SYMBOL(msm_pinctrl_remove); - MODULE_DESCRIPTION("Qualcomm Technologies, Inc. TLMM driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h index 63852ed70295..d7dc0947bb16 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.h +++ b/drivers/pinctrl/qcom/pinctrl-msm.h @@ -171,6 +171,5 @@ extern const struct dev_pm_ops msm_pinctrl_dev_pm_ops; int msm_pinctrl_probe(struct platform_device *pdev, const struct msm_pinctrl_soc_data *soc_data); -void msm_pinctrl_remove(struct platform_device *pdev); #endif diff --git a/drivers/pinctrl/qcom/pinctrl-msm8226.c b/drivers/pinctrl/qcom/pinctrl-msm8226.c index 64fee70f1772..f9a957347340 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8226.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8226.c @@ -654,7 +654,6 @@ static struct platform_driver msm8226_pinctrl_driver = { .of_match_table = msm8226_pinctrl_of_match, }, .probe = msm8226_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8226_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8660.c b/drivers/pinctrl/qcom/pinctrl-msm8660.c index 999a5f867eb5..4dbc19ffd80e 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8660.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8660.c @@ -981,7 +981,6 @@ static struct platform_driver msm8660_pinctrl_driver = { .of_match_table = msm8660_pinctrl_of_match, }, .probe = msm8660_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8660_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8909.c b/drivers/pinctrl/qcom/pinctrl-msm8909.c index 756856d20d6b..0aa4f77b774f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8909.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8909.c @@ -929,7 +929,6 @@ static struct platform_driver msm8909_pinctrl_driver = { .of_match_table = msm8909_pinctrl_of_match, }, .probe = msm8909_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8909_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8916.c b/drivers/pinctrl/qcom/pinctrl-msm8916.c index cea5c54f92fe..0dfc6dd33d58 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8916.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8916.c @@ -969,7 +969,6 @@ static struct platform_driver msm8916_pinctrl_driver = { .of_match_table = msm8916_pinctrl_of_match, }, .probe = msm8916_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8916_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8917.c b/drivers/pinctrl/qcom/pinctrl-msm8917.c index 350636807b07..2e1a94ab18b2 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8917.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8917.c @@ -1607,7 +1607,6 @@ static struct platform_driver msm8917_pinctrl_driver = { .of_match_table = msm8917_pinctrl_of_match, }, .probe = msm8917_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8917_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8953.c b/drivers/pinctrl/qcom/pinctrl-msm8953.c index 998351bdfee1..956383341a7a 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8953.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8953.c @@ -1816,7 +1816,6 @@ static struct platform_driver msm8953_pinctrl_driver = { .of_match_table = msm8953_pinctrl_of_match, }, .probe = msm8953_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8953_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8960.c b/drivers/pinctrl/qcom/pinctrl-msm8960.c index ebe230b3b437..a937ea867de7 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8960.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8960.c @@ -1246,7 +1246,6 @@ static struct platform_driver msm8960_pinctrl_driver = { .of_match_table = msm8960_pinctrl_of_match, }, .probe = msm8960_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8960_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8976.c b/drivers/pinctrl/qcom/pinctrl-msm8976.c index c30d80e4e98c..3bcb03387781 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8976.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8976.c @@ -1096,7 +1096,6 @@ static struct platform_driver msm8976_pinctrl_driver = { .of_match_table = msm8976_pinctrl_of_match, }, .probe = msm8976_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8976_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8994.c b/drivers/pinctrl/qcom/pinctrl-msm8994.c index b1a6759ab4a5..7a3b6cbccb68 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8994.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8994.c @@ -1343,7 +1343,6 @@ static struct platform_driver msm8994_pinctrl_driver = { .of_match_table = msm8994_pinctrl_of_match, }, .probe = msm8994_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8994_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8996.c b/drivers/pinctrl/qcom/pinctrl-msm8996.c index 1b5d80eaab83..d86d83106d3b 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8996.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8996.c @@ -1920,7 +1920,6 @@ static struct platform_driver msm8996_pinctrl_driver = { .of_match_table = msm8996_pinctrl_of_match, }, .probe = msm8996_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8996_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8998.c b/drivers/pinctrl/qcom/pinctrl-msm8998.c index b7cbf32b3125..1daee815888f 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8998.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8998.c @@ -1535,7 +1535,6 @@ static struct platform_driver msm8998_pinctrl_driver = { .of_match_table = msm8998_pinctrl_of_match, }, .probe = msm8998_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8998_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-msm8x74.c b/drivers/pinctrl/qcom/pinctrl-msm8x74.c index 238c83f6ec4f..8253aa25775b 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm8x74.c +++ b/drivers/pinctrl/qcom/pinctrl-msm8x74.c @@ -1083,7 +1083,6 @@ static struct platform_driver msm8x74_pinctrl_driver = { .of_match_table = msm8x74_pinctrl_of_match, }, .probe = msm8x74_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init msm8x74_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcm2290.c b/drivers/pinctrl/qcom/pinctrl-qcm2290.c index f885af571ec9..85c951305abd 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcm2290.c +++ b/drivers/pinctrl/qcom/pinctrl-qcm2290.c @@ -1125,7 +1125,6 @@ static struct platform_driver qcm2290_pinctrl_driver = { .of_match_table = qcm2290_pinctrl_of_match, }, .probe = qcm2290_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcm2290_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs404.c b/drivers/pinctrl/qcom/pinctrl-qcs404.c index ae7224012f8a..54e3b4435349 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs404.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs404.c @@ -1644,7 +1644,6 @@ static struct platform_driver qcs404_pinctrl_driver = { .of_match_table = qcs404_pinctrl_of_match, }, .probe = qcs404_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs404_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs615.c b/drivers/pinctrl/qcom/pinctrl-qcs615.c index 17ca743c2210..2a943bc46a62 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs615.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs615.c @@ -1087,7 +1087,6 @@ static struct platform_driver qcs615_tlmm_driver = { .of_match_table = qcs615_tlmm_of_match, }, .probe = qcs615_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs615_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qcs8300.c b/drivers/pinctrl/qcom/pinctrl-qcs8300.c index 5f5f7c4ac644..d6437e26392b 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcs8300.c +++ b/drivers/pinctrl/qcom/pinctrl-qcs8300.c @@ -1227,7 +1227,6 @@ static struct platform_driver qcs8300_pinctrl_driver = { .of_match_table = qcs8300_pinctrl_of_match, }, .probe = qcs8300_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qcs8300_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c index b5808fcfb13c..9ecc4d40e4dc 100644 --- a/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c +++ b/drivers/pinctrl/qcom/pinctrl-qdf2xxx.c @@ -145,7 +145,6 @@ static struct platform_driver qdf2xxx_pinctrl_driver = { .acpi_match_table = qdf2xxx_acpi_ids, }, .probe = qdf2xxx_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init qdf2xxx_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-qdu1000.c b/drivers/pinctrl/qcom/pinctrl-qdu1000.c index 47bc529ef550..eacb89fa3888 100644 --- a/drivers/pinctrl/qcom/pinctrl-qdu1000.c +++ b/drivers/pinctrl/qcom/pinctrl-qdu1000.c @@ -1248,7 +1248,6 @@ static struct platform_driver qdu1000_tlmm_driver = { .of_match_table = qdu1000_tlmm_of_match, }, .probe = qdu1000_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init qdu1000_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sa8775p.c b/drivers/pinctrl/qcom/pinctrl-sa8775p.c index a5b38221aea8..1b62eb3e6620 100644 --- a/drivers/pinctrl/qcom/pinctrl-sa8775p.c +++ b/drivers/pinctrl/qcom/pinctrl-sa8775p.c @@ -1540,7 +1540,6 @@ static struct platform_driver sa8775p_pinctrl_driver = { .of_match_table = sa8775p_pinctrl_of_match, }, .probe = sa8775p_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sa8775p_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sar2130p.c b/drivers/pinctrl/qcom/pinctrl-sar2130p.c index 19a2e37826c7..3dd1b5e5cfee 100644 --- a/drivers/pinctrl/qcom/pinctrl-sar2130p.c +++ b/drivers/pinctrl/qcom/pinctrl-sar2130p.c @@ -1486,7 +1486,6 @@ static struct platform_driver sar2130p_tlmm_driver = { .of_match_table = sar2130p_tlmm_of_match, }, .probe = sar2130p_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sar2130p_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc7180.c b/drivers/pinctrl/qcom/pinctrl-sc7180.c index 6eb0c73791c0..c43fe10b71ad 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc7180.c +++ b/drivers/pinctrl/qcom/pinctrl-sc7180.c @@ -1159,7 +1159,6 @@ static struct platform_driver sc7180_pinctrl_driver = { .of_match_table = sc7180_pinctrl_of_match, }, .probe = sc7180_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc7180_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc7280.c b/drivers/pinctrl/qcom/pinctrl-sc7280.c index 0c10eeb60b55..1b070e9d41f5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc7280.c +++ b/drivers/pinctrl/qcom/pinctrl-sc7280.c @@ -1505,7 +1505,6 @@ static struct platform_driver sc7280_pinctrl_driver = { .of_match_table = sc7280_pinctrl_of_match, }, .probe = sc7280_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc7280_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc8180x.c b/drivers/pinctrl/qcom/pinctrl-sc8180x.c index d6a79ad41a40..26dd165d1543 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc8180x.c +++ b/drivers/pinctrl/qcom/pinctrl-sc8180x.c @@ -1720,7 +1720,6 @@ static struct platform_driver sc8180x_pinctrl_driver = { .acpi_match_table = sc8180x_pinctrl_acpi_match, }, .probe = sc8180x_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc8180x_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c index 96f4fb5a5d29..6ccd7e5648d4 100644 --- a/drivers/pinctrl/qcom/pinctrl-sc8280xp.c +++ b/drivers/pinctrl/qcom/pinctrl-sc8280xp.c @@ -1926,7 +1926,6 @@ static struct platform_driver sc8280xp_pinctrl_driver = { .of_match_table = sc8280xp_pinctrl_of_match, }, .probe = sc8280xp_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sc8280xp_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm660.c b/drivers/pinctrl/qcom/pinctrl-sdm660.c index 907e4ffca5e7..1a78288f1bc8 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm660.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm660.c @@ -1442,7 +1442,6 @@ static struct platform_driver sdm660_pinctrl_driver = { .of_match_table = sdm660_pinctrl_of_match, }, .probe = sdm660_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm660_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm670.c b/drivers/pinctrl/qcom/pinctrl-sdm670.c index c76183ba95e1..0fe1fa94cd6d 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm670.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm670.c @@ -1337,7 +1337,6 @@ static struct platform_driver sdm670_pinctrl_driver = { .of_match_table = sdm670_pinctrl_of_match, }, .probe = sdm670_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm670_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdm845.c b/drivers/pinctrl/qcom/pinctrl-sdm845.c index cc05c415ed15..0446e291aa48 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm845.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm845.c @@ -1351,7 +1351,6 @@ static struct platform_driver sdm845_pinctrl_driver = { .acpi_match_table = ACPI_PTR(sdm845_pinctrl_acpi_match), }, .probe = sdm845_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdm845_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx55.c b/drivers/pinctrl/qcom/pinctrl-sdx55.c index 8826db9d21d0..2c17bf889146 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx55.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx55.c @@ -990,7 +990,6 @@ static struct platform_driver sdx55_pinctrl_driver = { .of_match_table = sdx55_pinctrl_of_match, }, .probe = sdx55_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx55_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx65.c b/drivers/pinctrl/qcom/pinctrl-sdx65.c index f6f319c997fc..85b5c0206dbd 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx65.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx65.c @@ -939,7 +939,6 @@ static struct platform_driver sdx65_pinctrl_driver = { .of_match_table = sdx65_pinctrl_of_match, }, .probe = sdx65_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx65_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sdx75.c b/drivers/pinctrl/qcom/pinctrl-sdx75.c index 3cfe8c7f04df..ab13a3a57a83 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdx75.c +++ b/drivers/pinctrl/qcom/pinctrl-sdx75.c @@ -1124,7 +1124,6 @@ static struct platform_driver sdx75_pinctrl_driver = { .of_match_table = sdx75_pinctrl_of_match, }, .probe = sdx75_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sdx75_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm4450.c b/drivers/pinctrl/qcom/pinctrl-sm4450.c index 622f20e6f6f8..1ecdf1ab4f27 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm4450.c +++ b/drivers/pinctrl/qcom/pinctrl-sm4450.c @@ -994,7 +994,6 @@ static struct platform_driver sm4450_tlmm_driver = { .of_match_table = sm4450_tlmm_of_match, }, .probe = sm4450_tlmm_probe, - .remove = msm_pinctrl_remove, }; MODULE_DEVICE_TABLE(of, sm4450_tlmm_of_match); diff --git a/drivers/pinctrl/qcom/pinctrl-sm6115.c b/drivers/pinctrl/qcom/pinctrl-sm6115.c index 4e91c75ad952..c273efa43996 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6115.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6115.c @@ -907,7 +907,6 @@ static struct platform_driver sm6115_tlmm_driver = { .of_match_table = sm6115_tlmm_of_match, }, .probe = sm6115_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6115_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6125.c b/drivers/pinctrl/qcom/pinctrl-sm6125.c index c188842047aa..5092f20e0c1b 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6125.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6125.c @@ -1266,7 +1266,6 @@ static struct platform_driver sm6125_tlmm_driver = { .of_match_table = sm6125_tlmm_of_match, }, .probe = sm6125_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6125_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6350.c b/drivers/pinctrl/qcom/pinctrl-sm6350.c index f3828c07b134..ba4686c86c54 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6350.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6350.c @@ -1373,7 +1373,6 @@ static struct platform_driver sm6350_tlmm_driver = { .of_match_table = sm6350_tlmm_of_match, }, .probe = sm6350_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6350_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm6375.c b/drivers/pinctrl/qcom/pinctrl-sm6375.c index c82c8516932e..49031571e65e 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm6375.c +++ b/drivers/pinctrl/qcom/pinctrl-sm6375.c @@ -1516,7 +1516,6 @@ static struct platform_driver sm6375_tlmm_driver = { .of_match_table = sm6375_tlmm_of_match, }, .probe = sm6375_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm6375_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm7150.c b/drivers/pinctrl/qcom/pinctrl-sm7150.c index 3c7fd8af6635..6e89966cd70e 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm7150.c +++ b/drivers/pinctrl/qcom/pinctrl-sm7150.c @@ -1255,7 +1255,6 @@ static struct platform_driver sm7150_tlmm_driver = { .of_match_table = sm7150_tlmm_of_match, }, .probe = sm7150_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm7150_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8150.c b/drivers/pinctrl/qcom/pinctrl-sm8150.c index 01aea9c70b7a..794ed99463f7 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8150.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8150.c @@ -1542,7 +1542,6 @@ static struct platform_driver sm8150_pinctrl_driver = { .of_match_table = sm8150_pinctrl_of_match, }, .probe = sm8150_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8150_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c index e9961a49ff98..fb6f005d64f5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8250.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c @@ -1351,7 +1351,6 @@ static struct platform_driver sm8250_pinctrl_driver = { .of_match_table = sm8250_pinctrl_of_match, }, .probe = sm8250_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8250_pinctrl_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8350.c b/drivers/pinctrl/qcom/pinctrl-sm8350.c index 9c69458bd910..c8a3f39ce6f1 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8350.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8350.c @@ -1642,7 +1642,6 @@ static struct platform_driver sm8350_tlmm_driver = { .of_match_table = sm8350_tlmm_of_match, }, .probe = sm8350_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8350_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8450.c b/drivers/pinctrl/qcom/pinctrl-sm8450.c index d11bb1ee9e3d..f2e52d5a0f93 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8450.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8450.c @@ -1677,7 +1677,6 @@ static struct platform_driver sm8450_tlmm_driver = { .of_match_table = sm8450_tlmm_of_match, }, .probe = sm8450_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8450_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8550.c b/drivers/pinctrl/qcom/pinctrl-sm8550.c index 3c847d9cb5d9..1b4496cb39eb 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8550.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8550.c @@ -1762,7 +1762,6 @@ static struct platform_driver sm8550_tlmm_driver = { .of_match_table = sm8550_tlmm_of_match, }, .probe = sm8550_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8550_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8650.c b/drivers/pinctrl/qcom/pinctrl-sm8650.c index 104708252d12..449a0077f4b1 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8650.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8650.c @@ -1742,7 +1742,6 @@ static struct platform_driver sm8650_tlmm_driver = { .of_match_table = sm8650_tlmm_of_match, }, .probe = sm8650_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8650_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8750.c b/drivers/pinctrl/qcom/pinctrl-sm8750.c index b94fb4ee0ec3..8516693d1db5 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8750.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8750.c @@ -1711,7 +1711,6 @@ static struct platform_driver sm8750_tlmm_driver = { .of_match_table = sm8750_tlmm_of_match, }, .probe = sm8750_tlmm_probe, - .remove = msm_pinctrl_remove, }; static int __init sm8750_tlmm_init(void) diff --git a/drivers/pinctrl/qcom/pinctrl-x1e80100.c b/drivers/pinctrl/qcom/pinctrl-x1e80100.c index 419cb8facb2f..d4b215f34c39 100644 --- a/drivers/pinctrl/qcom/pinctrl-x1e80100.c +++ b/drivers/pinctrl/qcom/pinctrl-x1e80100.c @@ -1861,7 +1861,6 @@ static struct platform_driver x1e80100_pinctrl_driver = { .of_match_table = x1e80100_pinctrl_of_match, }, .probe = x1e80100_pinctrl_probe, - .remove = msm_pinctrl_remove, }; static int __init x1e80100_pinctrl_init(void) From 315345610faee8a0568b522dba9e35067d1732ab Mon Sep 17 00:00:00 2001 From: Wojciech Slenska Date: Fri, 23 May 2025 12:14:37 +0200 Subject: [PATCH 067/497] pinctrl: qcom: pinctrl-qcm2290: Add missing pins Added the missing pins to the qcm2290_pins table. Signed-off-by: Wojciech Slenska Fixes: 48e049ef1238 ("pinctrl: qcom: Add QCM2290 pinctrl driver") Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/20250523101437.59092-1-wojciech.slenska@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-qcm2290.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-qcm2290.c b/drivers/pinctrl/qcom/pinctrl-qcm2290.c index 85c951305abd..eeeec6434f6a 100644 --- a/drivers/pinctrl/qcom/pinctrl-qcm2290.c +++ b/drivers/pinctrl/qcom/pinctrl-qcm2290.c @@ -167,6 +167,10 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(62, "GPIO_62"), PINCTRL_PIN(63, "GPIO_63"), PINCTRL_PIN(64, "GPIO_64"), + PINCTRL_PIN(65, "GPIO_65"), + PINCTRL_PIN(66, "GPIO_66"), + PINCTRL_PIN(67, "GPIO_67"), + PINCTRL_PIN(68, "GPIO_68"), PINCTRL_PIN(69, "GPIO_69"), PINCTRL_PIN(70, "GPIO_70"), PINCTRL_PIN(71, "GPIO_71"), @@ -181,12 +185,17 @@ static const struct pinctrl_pin_desc qcm2290_pins[] = { PINCTRL_PIN(80, "GPIO_80"), PINCTRL_PIN(81, "GPIO_81"), PINCTRL_PIN(82, "GPIO_82"), + PINCTRL_PIN(83, "GPIO_83"), + PINCTRL_PIN(84, "GPIO_84"), + PINCTRL_PIN(85, "GPIO_85"), PINCTRL_PIN(86, "GPIO_86"), PINCTRL_PIN(87, "GPIO_87"), PINCTRL_PIN(88, "GPIO_88"), PINCTRL_PIN(89, "GPIO_89"), PINCTRL_PIN(90, "GPIO_90"), PINCTRL_PIN(91, "GPIO_91"), + PINCTRL_PIN(92, "GPIO_92"), + PINCTRL_PIN(93, "GPIO_93"), PINCTRL_PIN(94, "GPIO_94"), PINCTRL_PIN(95, "GPIO_95"), PINCTRL_PIN(96, "GPIO_96"), From fcd65d65fd85dbde090ef8c2615760ebc5ccf9e3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 May 2025 11:22:02 +0200 Subject: [PATCH 068/497] pinctrl: st: Drop unused st_gpio_bank() function Static inline st_gpio_bank() function is not referenced: pinctrl-st.c:377:19: error: unused function 'st_gpio_bank' [-Werror,-Wunused-function] Fixes: 701016c0cba5 ("pinctrl: st: Add pinctrl and pinconf support.") Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250528092201.52132-2-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-st.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index fe2d52e434db..8a2ef74862d3 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -374,11 +374,6 @@ static struct st_pio_control *st_get_pio_control( } /* Low level functions.. */ -static inline int st_gpio_bank(int gpio) -{ - return gpio/ST_GPIO_PINS_PER_BANK; -} - static inline int st_gpio_pin(int gpio) { return gpio%ST_GPIO_PINS_PER_BANK; From d38e00c417e1ca0c586802e47ad7d54347c91f67 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 28 May 2025 12:45:15 +0200 Subject: [PATCH 069/497] pinctrl: MAINTAINERS: Drop bouncing Jianlong Huang Emails to Jianlong Huang bounce since 9 months, so drop the person from maintainers: 550 5.4.1 Recipient address rejected: Access denied. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250528104514.184122-2-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- .../bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml | 2 +- .../bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml | 2 +- MAINTAINERS | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml index b470901f5f56..4dbef86bd958 100644 --- a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-aon-pinctrl.yaml @@ -15,7 +15,7 @@ description: | Some peripherals such as PWM have their I/O go through the 4 "GPIOs". maintainers: - - Jianlong Huang + - Hal Feng properties: compatible: diff --git a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml index 222b9e240f8a..e2a25a20f6a6 100644 --- a/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml +++ b/Documentation/devicetree/bindings/pinctrl/starfive,jh7110-sys-pinctrl.yaml @@ -18,7 +18,7 @@ description: | any GPIO can be set up to be controlled by any of the peripherals. maintainers: - - Jianlong Huang + - Hal Feng properties: compatible: diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..4f17af5d7240 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -23661,7 +23661,6 @@ F: include/dt-bindings/clock/starfive?jh71*.h STARFIVE JH71X0 PINCTRL DRIVERS M: Emil Renner Berthing -M: Jianlong Huang M: Hal Feng L: linux-gpio@vger.kernel.org S: Maintained From 24b0277c1c539cd41539d9297baafc62df04464a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 1 Jun 2025 12:51:01 +0200 Subject: [PATCH 070/497] pinctrl: tb10x: Drop of_match_ptr for ID table The driver can match only via the DT table so the table should be always used and the of_match_ptr does not have any sense (this also allows ACPI matching via PRP0001, even though it might not be relevant here). This also fixes !CONFIG_OF warning: pinctrl-tb10x.c:815:34: warning: unused variable 'tb10x_pinctrl_dt_ids' [-Wunused-const-variable] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202505301317.EI1caRC0-lkp@intel.com/ Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/20250601105100.27927-2-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-tb10x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c index d6bb8f58978d..4edb20e61951 100644 --- a/drivers/pinctrl/pinctrl-tb10x.c +++ b/drivers/pinctrl/pinctrl-tb10x.c @@ -823,7 +823,7 @@ static struct platform_driver tb10x_pinctrl_pdrv = { .remove = tb10x_pinctrl_remove, .driver = { .name = "tb10x_pinctrl", - .of_match_table = of_match_ptr(tb10x_pinctrl_dt_ids), + .of_match_table = tb10x_pinctrl_dt_ids, } }; From e51a086117ed857ea455c9ea774dbfb82f53e517 Mon Sep 17 00:00:00 2001 From: Andres Urian Florez Date: Sun, 8 Jun 2025 18:04:21 -0500 Subject: [PATCH 071/497] spi: offload: check offload ops existence before disabling the trigger Add a safe guard in spi_offload_trigger to check the existence of offload->ops before invoking the trigger_disable callback Signed-off-by: Andres Urian Florez Link: https://patch.msgid.link/20250608230422.325360-1-andres.emb.sys@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-offload.c b/drivers/spi/spi-offload.c index e674097bf3be..d336f4d228d5 100644 --- a/drivers/spi/spi-offload.c +++ b/drivers/spi/spi-offload.c @@ -297,7 +297,7 @@ int spi_offload_trigger_enable(struct spi_offload *offload, if (trigger->ops->enable) { ret = trigger->ops->enable(trigger, config); if (ret) { - if (offload->ops->trigger_disable) + if (offload->ops && offload->ops->trigger_disable) offload->ops->trigger_disable(offload); return ret; } From 779a0c9e06a91d0007f2a4f4071e3b9a8102b15e Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Wed, 4 Jun 2025 07:32:17 -0700 Subject: [PATCH 072/497] accel/amdxdna: Fix incorrect PSP firmware size The incorrect PSP firmware size is used for initializing. It may cause error for newer version firmware. Fixes: 8c9ff1b181ba ("accel/amdxdna: Add a new driver for AMD AI Engine") Acked-by: Alex Deucher Signed-off-by: Lizhi Hou Link: https://lore.kernel.org/r/20250604143217.1386272-1-lizhi.hou@amd.com --- drivers/accel/amdxdna/aie2_psp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c index dc3a072ce3b6..f28a060a8810 100644 --- a/drivers/accel/amdxdna/aie2_psp.c +++ b/drivers/accel/amdxdna/aie2_psp.c @@ -126,8 +126,8 @@ struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config * psp->ddev = ddev; memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs)); - psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN; - psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL); + psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN); + psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz + PSP_FW_ALIGN, GFP_KERNEL); if (!psp->fw_buffer) { drm_err(ddev, "no memory for fw buffer"); return NULL; From d6fb4f01736a1d18cc981eb04fa2907a7121fc27 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 21 May 2025 11:01:02 +0200 Subject: [PATCH 073/497] drm/xe/svm: Fix regression disallowing 64K SVM migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When changing the condition from >= SZ_64K, it was changed to <= SZ_64K. This disallows migration of 64K, which is the exact minimum allowed. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/5057 Fixes: 794f5493f518 ("drm/xe: Strict migration policy for atomic SVM faults") Cc: stable@vger.kernel.org Cc: Matthew Brost Cc: Himal Prasad Ghimiray Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Maarten Lankhorst Link: https://lore.kernel.org/r/20250521090102.2965100-1-dev@lankhorst.se (cherry picked from commit 531bef26d189b28bf0d694878c0e064b30990b6c) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 6345896585de..f0b167b3fb6a 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -764,7 +764,7 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, return false; } - if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) { + if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) { drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n"); return false; } From cf2c3eceb757e3f28e6f1034f9bc178e1535f5cc Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Mon, 9 Jun 2025 17:05:04 +0200 Subject: [PATCH 074/497] spi: stm32-ospi: Make usage of reset_control_acquire/release() API As ospi reset is consumed by both OMM and OSPI drivers, use the reset acquire/release mechanism which ensure exclusive reset usage. This avoid to call reset_control_get/put() in OMM driver each time we need to reset OSPI children and guarantee the reset line stays deasserted. During resume, OMM driver takes temporarily control of reset. Fixes: 79b8a705e26c ("spi: stm32: Add OSPI driver") Signed-off-by: Patrice Chotard Reviewed-by: Philipp Zabel Link: https://patch.msgid.link/20250609-b4-upstream_ospi_reset_update-v6-1-5b602b567e8a@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-ospi.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index 7c1fa55fbc47..db6b1cfc970f 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -804,7 +804,7 @@ static int stm32_ospi_get_resources(struct platform_device *pdev) return ret; } - ospi->rstc = devm_reset_control_array_get_exclusive(dev); + ospi->rstc = devm_reset_control_array_get_exclusive_released(dev); if (IS_ERR(ospi->rstc)) return dev_err_probe(dev, PTR_ERR(ospi->rstc), "Can't get reset\n"); @@ -936,11 +936,13 @@ static int stm32_ospi_probe(struct platform_device *pdev) if (ret < 0) goto err_pm_enable; - if (ospi->rstc) { - reset_control_assert(ospi->rstc); - udelay(2); - reset_control_deassert(ospi->rstc); - } + ret = reset_control_acquire(ospi->rstc); + if (ret) + return dev_err_probe(dev, ret, "Can not acquire reset %d\n", ret); + + reset_control_assert(ospi->rstc); + udelay(2); + reset_control_deassert(ospi->rstc); ret = spi_register_controller(ctrl); if (ret) { @@ -987,6 +989,8 @@ static void stm32_ospi_remove(struct platform_device *pdev) if (ospi->dma_chrx) dma_release_channel(ospi->dma_chrx); + reset_control_release(ospi->rstc); + pm_runtime_put_sync_suspend(ospi->dev); pm_runtime_force_suspend(ospi->dev); } @@ -997,6 +1001,8 @@ static int __maybe_unused stm32_ospi_suspend(struct device *dev) pinctrl_pm_select_sleep_state(dev); + reset_control_release(ospi->rstc); + return pm_runtime_force_suspend(ospi->dev); } @@ -1016,6 +1022,12 @@ static int __maybe_unused stm32_ospi_resume(struct device *dev) if (ret < 0) return ret; + ret = reset_control_acquire(ospi->rstc); + if (ret) { + dev_err(dev, "Can not acquire reset\n"); + return ret; + } + writel_relaxed(ospi->cr_reg, regs_base + OSPI_CR); writel_relaxed(ospi->dcr_reg, regs_base + OSPI_DCR1); pm_runtime_mark_last_busy(ospi->dev); From e34dbbc85d64af59176fe59fad7b4122f4330fe2 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Mon, 9 Jun 2025 01:40:53 -0700 Subject: [PATCH 075/497] x86/fred/signal: Prevent immediate repeat of single step trap on return from SIGTRAP handler Clear the software event flag in the augmented SS to prevent immediate repeat of single step trap on return from SIGTRAP handler if the trap flag (TF) is set without an external debugger attached. Following is a typical single-stepping flow for a user process: 1) The user process is prepared for single-stepping by setting RFLAGS.TF = 1. 2) When any instruction in user space completes, a #DB is triggered. 3) The kernel handles the #DB and returns to user space, invoking the SIGTRAP handler with RFLAGS.TF = 0. 4) After the SIGTRAP handler finishes, the user process performs a sigreturn syscall, restoring the original state, including RFLAGS.TF = 1. 5) Goto step 2. According to the FRED specification: A) Bit 17 in the augmented SS is designated as the software event flag, which is set to 1 for FRED event delivery of SYSCALL, SYSENTER, or INT n. B) If bit 17 of the augmented SS is 1 and ERETU would result in RFLAGS.TF = 1, a single-step trap will be pending upon completion of ERETU. In step 4) above, the software event flag is set upon the sigreturn syscall, and its corresponding ERETU would restore RFLAGS.TF = 1. This combination causes a pending single-step trap upon completion of ERETU. Therefore, another #DB is triggered before any user space instruction is executed, which leads to an infinite loop in which the SIGTRAP handler keeps being invoked on the same user space IP. Fixes: 14619d912b65 ("x86/fred: FRED entry/exit and dispatch code") Suggested-by: H. Peter Anvin (Intel) Signed-off-by: Xin Li (Intel) Signed-off-by: Dave Hansen Tested-by: Sohil Mehta Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20250609084054.2083189-2-xin%40zytor.com --- arch/x86/include/asm/sighandling.h | 22 ++++++++++++++++++++++ arch/x86/kernel/signal_32.c | 4 ++++ arch/x86/kernel/signal_64.c | 4 ++++ 3 files changed, 30 insertions(+) diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index e770c4fc47f4..8727c7e21dd1 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -24,4 +24,26 @@ int ia32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); int x32_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs); +/* + * To prevent immediate repeat of single step trap on return from SIGTRAP + * handler if the trap flag (TF) is set without an external debugger attached, + * clear the software event flag in the augmented SS, ensuring no single-step + * trap is pending upon ERETU completion. + * + * Note, this function should be called in sigreturn() before the original + * state is restored to make sure the TF is read from the entry frame. + */ +static __always_inline void prevent_single_step_upon_eretu(struct pt_regs *regs) +{ + /* + * If the trap flag (TF) is set, i.e., the sigreturn() SYSCALL instruction + * is being single-stepped, do not clear the software event flag in the + * augmented SS, thus a debugger won't skip over the following instruction. + */ +#ifdef CONFIG_X86_FRED + if (!(regs->flags & X86_EFLAGS_TF)) + regs->fred_ss.swevent = 0; +#endif +} + #endif /* _ASM_X86_SIGHANDLING_H */ diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 98123ff10506..42bbc42bd350 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -152,6 +152,8 @@ SYSCALL32_DEFINE0(sigreturn) struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; + prevent_single_step_upon_eretu(regs); + if (!access_ok(frame, sizeof(*frame))) goto badframe; if (__get_user(set.sig[0], &frame->sc.oldmask) @@ -175,6 +177,8 @@ SYSCALL32_DEFINE0(rt_sigreturn) struct rt_sigframe_ia32 __user *frame; sigset_t set; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); if (!access_ok(frame, sizeof(*frame))) diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ee9453891901..d483b585c6c6 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -250,6 +250,8 @@ SYSCALL_DEFINE0(rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(frame, sizeof(*frame))) goto badframe; @@ -366,6 +368,8 @@ COMPAT_SYSCALL_DEFINE0(x32_rt_sigreturn) sigset_t set; unsigned long uc_flags; + prevent_single_step_upon_eretu(regs); + frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); if (!access_ok(frame, sizeof(*frame))) From f287822688eeb44ae1cf6ac45701d965efc33218 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Mon, 9 Jun 2025 01:40:54 -0700 Subject: [PATCH 076/497] selftests/x86: Add a test to detect infinite SIGTRAP handler loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When FRED is enabled, if the Trap Flag (TF) is set without an external debugger attached, it can lead to an infinite loop in the SIGTRAP handler. To avoid this, the software event flag in the augmented SS must be cleared, ensuring that no single-step trap remains pending when ERETU completes. This test checks for that specific scenario—verifying whether the kernel correctly prevents an infinite SIGTRAP loop in this edge case when FRED is enabled. The test should _always_ pass with IDT event delivery, thus no need to disable the test even when FRED is not enabled. Signed-off-by: Xin Li (Intel) Signed-off-by: Dave Hansen Tested-by: Sohil Mehta Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20250609084054.2083189-3-xin%40zytor.com --- tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/sigtrap_loop.c | 101 +++++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/x86/sigtrap_loop.c diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index f703fcfe9f7c..83148875a12c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - test_vsyscall mov_ss_trap \ + test_vsyscall mov_ss_trap sigtrap_loop \ syscall_arg_fault fsgsbase_restore sigaltstack TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ diff --git a/tools/testing/selftests/x86/sigtrap_loop.c b/tools/testing/selftests/x86/sigtrap_loop.c new file mode 100644 index 000000000000..9d065479e89f --- /dev/null +++ b/tools/testing/selftests/x86/sigtrap_loop.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Intel Corporation + */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include + +#ifdef __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + + return; +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + static unsigned int loop_count_on_same_ip; + static unsigned long last_trap_ip; + + if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) { + printf("\tTrapped at %016lx\n", last_trap_ip); + + /* + * If the same IP is hit more than 10 times in a row, it is + * _considered_ an infinite loop. + */ + if (++loop_count_on_same_ip > 10) { + printf("[FAIL]\tDetected SIGTRAP infinite loop\n"); + exit(1); + } + + return; + } + + loop_count_on_same_ip = 0; + last_trap_ip = ctx->uc_mcontext.gregs[REG_IP]; + printf("\tTrapped at %016lx\n", last_trap_ip); +} + +int main(int argc, char *argv[]) +{ + sethandler(SIGTRAP, sigtrap, 0); + + /* + * Set the Trap Flag (TF) to single-step the test code, therefore to + * trigger a SIGTRAP signal after each instruction until the TF is + * cleared. + * + * Because the arithmetic flags are not significant here, the TF is + * set by pushing 0x302 onto the stack and then popping it into the + * flags register. + * + * Four instructions in the following asm code are executed with the + * TF set, thus the SIGTRAP handler is expected to run four times. + */ + printf("[RUN]\tSIGTRAP infinite loop detection\n"); + asm volatile( +#ifdef __x86_64__ + /* + * Avoid clobbering the redzone + * + * Equivalent to "sub $128, %rsp", however -128 can be encoded + * in a single byte immediate while 128 uses 4 bytes. + */ + "add $-128, %rsp\n\t" +#endif + "push $0x302\n\t" + "popf\n\t" + "nop\n\t" + "nop\n\t" + "push $0x202\n\t" + "popf\n\t" +#ifdef __x86_64__ + "sub $-128, %rsp\n\t" +#endif + ); + + printf("[OK]\tNo SIGTRAP infinite loop detected\n"); + return 0; +} From e044b8a9545cd8265c7110c179aeec2624c16455 Mon Sep 17 00:00:00 2001 From: "Francesco Poli (wintermute)" Date: Wed, 21 May 2025 23:14:39 +0200 Subject: [PATCH 077/497] cpupower: split unitdir from libdir in Makefile Improve the installation procedure for the systemd service unit 'cpupower.service', to be more flexible. Some distros install libraries to /usr/lib64/, but systemd service units have to be installed to /usr/lib/systemd/system: as a consequence, the installation procedure should not assume that systemd service units can be installed to ${libdir}/systemd/system ... Define a dedicated variable ("unitdir") in the Makefile. Link: https://lore.kernel.org/linux-pm/260b6d79-ab61-43b7-a0eb-813e257bc028@leemhuis.info/T/#m0601940ab439d5cbd288819d2af190ce59e810e6 Fixes: 9c70b779ad91 ("cpupower: add a systemd service to run cpupower") Link: https://lore.kernel.org/r/20250521211656.65646-1-invernomuto@paranoici.org Signed-off-by: Francesco Poli (wintermute) Tested-by: Thorsten Leemhuis Signed-off-by: Shuah Khan --- tools/power/cpupower/Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index be8dfac14076..c43db1c41205 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -73,6 +73,7 @@ sbindir ?= /usr/sbin mandir ?= /usr/man libdir ?= /usr/lib libexecdir ?= /usr/libexec +unitdir ?= /usr/lib/systemd/system includedir ?= /usr/include localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower @@ -309,9 +310,9 @@ install-tools: $(OUTPUT)cpupower $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}' $(INSTALL) -d $(DESTDIR)${libexecdir} $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower' - $(INSTALL) -d $(DESTDIR)${libdir}/systemd/system - sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${libdir}/systemd/system/cpupower.service' - $(SETPERM_DATA) '$(DESTDIR)${libdir}/systemd/system/cpupower.service' + $(INSTALL) -d $(DESTDIR)${unitdir} + sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${unitdir}/cpupower.service' + $(SETPERM_DATA) '$(DESTDIR)${unitdir}/cpupower.service' install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 @@ -348,7 +349,7 @@ uninstall: - rm -f $(DESTDIR)${bindir}/utils/cpupower - rm -f $(DESTDIR)${confdir}cpupower-service.conf - rm -f $(DESTDIR)${libexecdir}/cpupower - - rm -f $(DESTDIR)${libdir}/systemd/system/cpupower.service + - rm -f $(DESTDIR)${unitdir}/cpupower.service - rm -f $(DESTDIR)${mandir}/man1/cpupower.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1 From ea7caffedd011f7d40abe93a884ffbe46f122535 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 8 Apr 2025 14:22:56 -0300 Subject: [PATCH 078/497] ARC: atomics: Implement arch_atomic64_cmpxchg using _relaxed The core atomic code has a number of macros where it elaborates architecture primitives into more functions. ARC uses arch_atomic64_cmpxchg() as it's architecture primitive which disable alot of the additional functions. Instead provide arch_cmpxchg64_relaxed() as the primitive and rely on the core macros to create arch_cmpxchg64(). The macros will also provide other functions, for instance, try_cmpxchg64_release(), giving a more complete implementation. Suggested-by: Mark Rutland Link: https://lore.kernel.org/r/Z0747n5bSep4_1VX@J2N7QTR9R3 Signed-off-by: Jason Gunthorpe Signed-off-by: Vineet Gupta --- arch/arc/include/asm/atomic64-arcv2.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h index 9b5791b85471..73080a664369 100644 --- a/arch/arc/include/asm/atomic64-arcv2.h +++ b/arch/arc/include/asm/atomic64-arcv2.h @@ -137,12 +137,9 @@ ATOMIC64_OPS(xor, xor, xor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline s64 -arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) +static inline u64 __arch_cmpxchg64_relaxed(volatile void *ptr, u64 old, u64 new) { - s64 prev; - - smp_mb(); + u64 prev; __asm__ __volatile__( "1: llockd %0, [%1] \n" @@ -152,14 +149,12 @@ arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) " bnz 1b \n" "2: \n" : "=&r"(prev) - : "r"(ptr), "ir"(expected), "r"(new) - : "cc"); /* memory clobber comes from smp_mb() */ - - smp_mb(); + : "r"(ptr), "ir"(old), "r"(new) + : "memory", "cc"); return prev; } -#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg +#define arch_cmpxchg64_relaxed __arch_cmpxchg64_relaxed static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) { From 857f4517965b282234e12f6bca0c21ef10eec09b Mon Sep 17 00:00:00 2001 From: Yu-Chun Lin Date: Thu, 10 Apr 2025 01:11:16 +0800 Subject: [PATCH 079/497] ARC: unwind: Use built-in sort swap to reduce code size and improve performance The custom swap function used in sort() was identical to the default built-in sort swap. Remove the custom swap function and passes NULL to sort(), allowing it to use the default swap function. This change reduces code size and improves performance, particularly when CONFIG_MITIGATION_RETPOLINE is enabled. With RETPOLINE mitigation, indirect function calls incur significant overhead, and using the default swap function avoids this cost. $ ./scripts/bloat-o-meter ./unwind.o.old ./unwind.o.new add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-22 (-22) Function old new delta init_unwind_hdr.constprop 544 540 -4 swap_eh_frame_hdr_table_entries 18 - -18 Total: Before=4410, After=4388, chg -0.50% Signed-off-by: Yu-Chun Lin Signed-off-by: Vineet Gupta --- arch/arc/kernel/unwind.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index d8969dab12d4..789cfb9ea14e 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -241,15 +241,6 @@ static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) return (e1->start > e2->start) - (e1->start < e2->start); } -static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) -{ - struct eh_frame_hdr_table_entry *e1 = p1; - struct eh_frame_hdr_table_entry *e2 = p2; - - swap(e1->start, e2->start); - swap(e1->fde, e2->fde); -} - static void init_unwind_hdr(struct unwind_table *table, void *(*alloc) (unsigned long)) { @@ -345,7 +336,7 @@ static void init_unwind_hdr(struct unwind_table *table, sort(header->table, n, sizeof(*header->table), - cmp_eh_frame_hdr_table_entries, swap_eh_frame_hdr_table_entries); + cmp_eh_frame_hdr_table_entries, NULL); table->hdrsz = hdrSize; smp_wmb(); From 2cb74be378675c860af0fcaf1ec2801beebdf028 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:09:35 +0100 Subject: [PATCH 080/497] ARC: Replace __ASSEMBLY__ with __ASSEMBLER__ in uapi headers __ASSEMBLY__ is only defined by the Makefile of the kernel, so this is not really useful for uapi headers (unless the userspace Makefile defines it, too). Let's switch to __ASSEMBLER__ which gets set automatically by the compiler when compiling assembly code. Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Thomas Huth Signed-off-by: Vineet Gupta --- arch/arc/include/uapi/asm/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 2a6eff57f6dd..3ae832db278c 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -14,7 +14,7 @@ #define PTRACE_GET_THREAD_AREA 25 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Userspace ABI: Register state needed by * -ptrace (gdbserver) @@ -53,6 +53,6 @@ struct user_regs_arcv2 { unsigned long r30, r58, r59; }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* _UAPI__ASM_ARC_PTRACE_H */ From 179e949719fe81219a3e23f1e716ac2d02eea845 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 14 Mar 2025 08:09:36 +0100 Subject: [PATCH 081/497] ARC: Replace __ASSEMBLY__ with __ASSEMBLER__ in the non-uapi headers While the GCC and Clang compilers already define __ASSEMBLER__ automatically when compiling assembly code, __ASSEMBLY__ is a macro that only gets defined by the Makefiles in the kernel. This can be very confusing when switching between userspace and kernelspace coding, or when dealing with uapi headers that rather should use __ASSEMBLER__ instead. So let's standardize on the __ASSEMBLER__ macro that is provided by the compilers now. This is a completely mechanical patch (done with a simple "sed -i" statement). Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Thomas Huth Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- arch/arc/include/asm/atomic.h | 4 ++-- arch/arc/include/asm/bitops.h | 4 ++-- arch/arc/include/asm/bug.h | 4 ++-- arch/arc/include/asm/cache.h | 4 ++-- arch/arc/include/asm/current.h | 4 ++-- arch/arc/include/asm/dsp-impl.h | 2 +- arch/arc/include/asm/dsp.h | 4 ++-- arch/arc/include/asm/dwarf.h | 4 ++-- arch/arc/include/asm/entry.h | 4 ++-- arch/arc/include/asm/irqflags-arcv2.h | 4 ++-- arch/arc/include/asm/irqflags-compact.h | 4 ++-- arch/arc/include/asm/jump_label.h | 4 ++-- arch/arc/include/asm/linkage.h | 6 +++--- arch/arc/include/asm/mmu-arcv2.h | 4 ++-- arch/arc/include/asm/mmu.h | 2 +- arch/arc/include/asm/page.h | 4 ++-- arch/arc/include/asm/pgtable-bits-arcv2.h | 4 ++-- arch/arc/include/asm/pgtable-levels.h | 4 ++-- arch/arc/include/asm/pgtable.h | 4 ++-- arch/arc/include/asm/processor.h | 4 ++-- arch/arc/include/asm/ptrace.h | 4 ++-- arch/arc/include/asm/switch_to.h | 2 +- arch/arc/include/asm/thread_info.h | 4 ++-- 24 files changed, 45 insertions(+), 45 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 005d9e4d187a..a31bbf5c8bbc 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -144,7 +144,7 @@ #define ARC_AUX_AGU_MOD2 0x5E2 #define ARC_AUX_AGU_MOD3 0x5E3 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 592d7fffc223..e615c42b93ba 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_ATOMIC_H #define _ASM_ARC_ATOMIC_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -31,6 +31,6 @@ #include #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index f5a936496f06..5340c2871392 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -10,7 +10,7 @@ #error only can be included directly #endif -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -192,6 +192,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) #include #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index 4c453ba96c51..171c16021f70 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_BUG_H #define _ASM_ARC_BUG_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -29,6 +29,6 @@ void die(const char *str, struct pt_regs *regs, unsigned long address); #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index f0f1fc5d62b6..040a97f4dd82 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -23,7 +23,7 @@ */ #define ARC_UNCACHED_ADDR_SPACE 0xc0000000 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -65,7 +65,7 @@ extern int ioc_enable; extern unsigned long perip_base, perip_end; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* Instruction cache related Auxiliary registers */ #define ARC_REG_IC_BCR 0x77 /* Build Config reg */ diff --git a/arch/arc/include/asm/current.h b/arch/arc/include/asm/current.h index 06be89f6f2f0..03ffd005f3fa 100644 --- a/arch/arc/include/asm/current.h +++ b/arch/arc/include/asm/current.h @@ -9,7 +9,7 @@ #ifndef _ASM_ARC_CURRENT_H #define _ASM_ARC_CURRENT_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #ifdef CONFIG_ARC_CURR_IN_REG @@ -20,6 +20,6 @@ register struct task_struct *curr_arc asm("gp"); #include #endif /* ! CONFIG_ARC_CURR_IN_REG */ -#endif /* ! __ASSEMBLY__ */ +#endif /* ! __ASSEMBLER__ */ #endif /* _ASM_ARC_CURRENT_H */ diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h index cd5636dfeb6f..fd5fdaad90c1 100644 --- a/arch/arc/include/asm/dsp-impl.h +++ b/arch/arc/include/asm/dsp-impl.h @@ -11,7 +11,7 @@ #define DSP_CTRL_DISABLED_ALL 0 -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ /* clobbers r5 register */ .macro DSP_EARLY_INIT diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h index f496dbc4640b..eeaaf4e4eabd 100644 --- a/arch/arc/include/asm/dsp.h +++ b/arch/arc/include/asm/dsp.h @@ -7,7 +7,7 @@ #ifndef __ASM_ARC_DSP_H #define __ASM_ARC_DSP_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * DSP-related saved registers - need to be saved only when you are @@ -24,6 +24,6 @@ struct dsp_callee_regs { #endif }; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_ARC_DSP_H */ diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h index a0d5ebe1bc3f..1524c5cf8b59 100644 --- a/arch/arc/include/asm/dwarf.h +++ b/arch/arc/include/asm/dwarf.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_DWARF_H #define _ASM_ARC_DWARF_H -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef ARC_DW2_UNWIND_AS_CFI @@ -38,6 +38,6 @@ #endif /* !ARC_DW2_UNWIND_AS_CFI */ -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ASM_ARC_DWARF_H */ diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index 38c35722cebf..f453af251a1a 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -13,7 +13,7 @@ #include /* For VMALLOC_START */ #include -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ #ifdef CONFIG_ISA_ARCOMPACT #include /* ISA specific bits */ @@ -146,7 +146,7 @@ #endif /* CONFIG_ARC_CURR_IN_REG */ -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ extern void do_signal(struct pt_regs *); extern void do_notify_resume(struct pt_regs *); diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index fb3c21f1a238..30aea562f8aa 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -50,7 +50,7 @@ #define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | __AD_ENB | \ (ARCV2_IRQ_DEF_PRIO << 1)) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * Save IRQ state and disable IRQs @@ -170,6 +170,6 @@ static inline void arc_softirq_clear(int irq) seti .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index 936a2f21f315..85c2f6bcde0c 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -40,7 +40,7 @@ #define ISA_INIT_STATUS_BITS STATUS_IE_MASK -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /****************************************************************** * IRQ Control Macros @@ -196,6 +196,6 @@ static inline int arch_irqs_disabled(void) flag \scratch .endm -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/jump_label.h b/arch/arc/include/asm/jump_label.h index a339223d9e05..66ead75784d9 100644 --- a/arch/arc/include/asm/jump_label.h +++ b/arch/arc/include/asm/jump_label.h @@ -2,7 +2,7 @@ #ifndef _ASM_ARC_JUMP_LABEL_H #define _ASM_ARC_JUMP_LABEL_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -68,5 +68,5 @@ struct jump_entry { jump_label_t key; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index 8a3fb71e9cfa..ba3cb65b5eaa 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -12,7 +12,7 @@ #define __ALIGN .align 4 #define __ALIGN_STR __stringify(__ALIGN) -#ifdef __ASSEMBLY__ +#ifdef __ASSEMBLER__ .macro ST2 e, o, off #ifdef CONFIG_ARC_HAS_LL64 @@ -61,7 +61,7 @@ CFI_ENDPROC ASM_NL \ .size name, .-name -#else /* !__ASSEMBLY__ */ +#else /* !__ASSEMBLER__ */ #ifdef CONFIG_ARC_HAS_ICCM #define __arcfp_code __section(".text.arcfp") @@ -75,6 +75,6 @@ #define __arcfp_data __section(".data") #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h index 41412642f279..5e5482026ac9 100644 --- a/arch/arc/include/asm/mmu-arcv2.h +++ b/arch/arc/include/asm/mmu-arcv2.h @@ -69,7 +69,7 @@ #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ struct mm_struct; extern int pae40_exist_but_not_enab(void); @@ -100,6 +100,6 @@ static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd) sr \reg, [ARC_REG_PID] .endm -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h index 4ae2db59d494..e3b35ceab582 100644 --- a/arch/arc/include/asm/mmu.h +++ b/arch/arc/include/asm/mmu.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_MMU_H #define _ASM_ARC_MMU_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include /* NR_CPUS */ diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index def0dfb95b43..9720fe6b2c24 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -19,7 +19,7 @@ #endif /* CONFIG_ARC_HAS_PAE40 */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) @@ -136,6 +136,6 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #include /* page_to_pfn, pfn_to_page */ #include -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index 8ebec1b21d24..a8fdb4fe1fbd 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -75,7 +75,7 @@ * This is to enable COW mechanism */ /* xwr */ -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) #define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) @@ -142,6 +142,6 @@ PTE_BIT_FUNC(swp_clear_exclusive, &= ~(_PAGE_SWP_EXCLUSIVE)); #include #endif -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h index d1ce4b0f1071..c8f9273372c0 100644 --- a/arch/arc/include/asm/pgtable-levels.h +++ b/arch/arc/include/asm/pgtable-levels.h @@ -85,7 +85,7 @@ #define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #if CONFIG_PGTABLE_LEVELS > 3 #include @@ -181,6 +181,6 @@ #define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) #endif -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 4cf45a99fd79..bd580e2b62d7 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -19,7 +19,7 @@ */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ extern char empty_zero_page[PAGE_SIZE]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) @@ -29,6 +29,6 @@ extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); /* to cope with aliasing VIPT cache */ #define HAVE_ARCH_UNMAPPED_AREA -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index d606658e2fe7..7f7901ac6643 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -11,7 +11,7 @@ #ifndef __ASM_ARC_PROCESSOR_H #define __ASM_ARC_PROCESSOR_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include @@ -66,7 +66,7 @@ extern void start_thread(struct pt_regs * regs, unsigned long pc, extern unsigned int __get_wchan(struct task_struct *p); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * Default System Memory Map on ARC diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index cf79df0b2570..f6c052af8f4d 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -10,7 +10,7 @@ #include #include -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ typedef union { struct { @@ -172,6 +172,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, extern int syscall_trace_enter(struct pt_regs *); extern void syscall_trace_exit(struct pt_regs *); -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ #endif /* __ASM_PTRACE_H */ diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h index 1f85de8288b1..5806106a65f9 100644 --- a/arch/arc/include/asm/switch_to.h +++ b/arch/arc/include/asm/switch_to.h @@ -6,7 +6,7 @@ #ifndef _ASM_ARC_SWITCH_TO_H #define _ASM_ARC_SWITCH_TO_H -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include #include diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 12daaf3a61ea..255d2c774219 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -24,7 +24,7 @@ #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_SHIFT (PAGE_SHIFT << THREAD_SIZE_ORDER) -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ #include @@ -62,7 +62,7 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void) return (struct thread_info *)(sp & ~(THREAD_SIZE - 1)); } -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLER__ */ /* * thread information flags From 5d319f75ccf7f0927425a7545aa1a22b3eedc189 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 14 May 2025 09:33:32 -0700 Subject: [PATCH 082/497] drm/msm: Fix a fence leak in submit error path In error paths, we could unref the submit without calling drm_sched_entity_push_job(), so msm_job_free() will never get called. Since drm_sched_job_cleanup() will NULL out the s_fence, we can use that to detect this case. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/653584/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3e9aa2cc38ef..b2aeaecaa39b 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -85,6 +85,15 @@ void __msm_gem_submit_destroy(struct kref *kref) container_of(kref, struct msm_gem_submit, ref); unsigned i; + /* + * In error paths, we could unref the submit without calling + * drm_sched_entity_push_job(), so msm_job_free() will never + * get called. Since drm_sched_job_cleanup() will NULL out + * s_fence, we can use that to detect this case. + */ + if (submit->base.s_fence) + drm_sched_job_cleanup(&submit->base); + if (submit->fence_id) { spin_lock(&submit->queue->idr_lock); idr_remove(&submit->queue->fence_idr, submit->fence_id); From f681c2aa8676a890eacc84044717ab0fd26e058f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 14 May 2025 09:33:33 -0700 Subject: [PATCH 083/497] drm/msm: Fix another leak in the submit error path put_unused_fd() doesn't free the installed file, if we've already done fd_install(). So we need to also free the sync_file. Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/653583/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index b2aeaecaa39b..d4f71bb54e84 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -658,6 +658,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_ringbuffer *ring; struct msm_submit_post_dep *post_deps = NULL; struct drm_syncobj **syncobjs_to_reset = NULL; + struct sync_file *sync_file = NULL; int out_fence_fd = -1; unsigned i; int ret; @@ -867,7 +868,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, } if (ret == 0 && args->flags & MSM_SUBMIT_FENCE_FD_OUT) { - struct sync_file *sync_file = sync_file_create(submit->user_fence); + sync_file = sync_file_create(submit->user_fence); if (!sync_file) { ret = -ENOMEM; } else { @@ -901,8 +902,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&queue->lock); out_post_unlock: - if (ret && (out_fence_fd >= 0)) + if (ret && (out_fence_fd >= 0)) { put_unused_fd(out_fence_fd); + if (sync_file) + fput(sync_file->file); + } if (!IS_ERR_OR_NULL(submit)) { msm_gem_submit_put(submit); From 0c5fea1eb0dc234cd321bcd19a6bd0f51273b4c6 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 20 May 2025 15:08:57 -0400 Subject: [PATCH 084/497] drm/msm: Don't use a worker to capture fault devcoredump Now that we use a threaded IRQ, it should be safe to do this in the fault handler. We can also remove fault_info from struct msm_gpu and just pass it directly. Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/654889/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 22 ++++++++-------------- drivers/gpu/drm/msm/msm_gpu.c | 20 +++++++++----------- drivers/gpu/drm/msm/msm_gpu.h | 8 ++------ 3 files changed, 19 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 2348ffb35f7e..5ebd5a6ea143 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -270,14 +270,6 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, const char *type = "UNKNOWN"; bool do_devcoredump = info && !READ_ONCE(gpu->crashstate); - /* - * If we aren't going to be resuming later from fault_worker, then do - * it now. - */ - if (!do_devcoredump) { - gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); - } - /* * Print a default message if we couldn't get the data from the * adreno-smmu-priv @@ -304,16 +296,18 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, scratch[0], scratch[1], scratch[2], scratch[3]); if (do_devcoredump) { + struct msm_gpu_fault_info fault_info = {}; + /* Turn off the hangcheck timer to keep it from bothering us */ timer_delete(&gpu->hangcheck_timer); - gpu->fault_info.ttbr0 = info->ttbr0; - gpu->fault_info.iova = iova; - gpu->fault_info.flags = flags; - gpu->fault_info.type = type; - gpu->fault_info.block = block; + fault_info.ttbr0 = info->ttbr0; + fault_info.iova = iova; + fault_info.flags = flags; + fault_info.type = type; + fault_info.block = block; - kthread_queue_work(gpu->worker, &gpu->fault_work); + msm_gpu_fault_crashstate_capture(gpu, &fault_info); } return 0; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index c380d9d9f5af..457f019d507e 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -257,7 +257,8 @@ static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state, } static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, - struct msm_gem_submit *submit, char *comm, char *cmd) + struct msm_gem_submit *submit, struct msm_gpu_fault_info *fault_info, + char *comm, char *cmd) { struct msm_gpu_state *state; @@ -276,7 +277,8 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, /* Fill in the additional crash state information */ state->comm = kstrdup(comm, GFP_KERNEL); state->cmd = kstrdup(cmd, GFP_KERNEL); - state->fault_info = gpu->fault_info; + if (fault_info) + state->fault_info = *fault_info; if (submit) { int i; @@ -308,7 +310,8 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, } #else static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, - struct msm_gem_submit *submit, char *comm, char *cmd) + struct msm_gem_submit *submit, struct msm_gpu_fault_info *fault_info, + char *comm, char *cmd) { } #endif @@ -405,7 +408,7 @@ static void recover_worker(struct kthread_work *work) /* Record the crash state */ pm_runtime_get_sync(&gpu->pdev->dev); - msm_gpu_crashstate_capture(gpu, submit, comm, cmd); + msm_gpu_crashstate_capture(gpu, submit, NULL, comm, cmd); kfree(cmd); kfree(comm); @@ -459,9 +462,8 @@ static void recover_worker(struct kthread_work *work) msm_gpu_retire(gpu); } -static void fault_worker(struct kthread_work *work) +void msm_gpu_fault_crashstate_capture(struct msm_gpu *gpu, struct msm_gpu_fault_info *fault_info) { - struct msm_gpu *gpu = container_of(work, struct msm_gpu, fault_work); struct msm_gem_submit *submit; struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); char *comm = NULL, *cmd = NULL; @@ -484,16 +486,13 @@ static void fault_worker(struct kthread_work *work) /* Record the crash state */ pm_runtime_get_sync(&gpu->pdev->dev); - msm_gpu_crashstate_capture(gpu, submit, comm, cmd); + msm_gpu_crashstate_capture(gpu, submit, fault_info, comm, cmd); pm_runtime_put_sync(&gpu->pdev->dev); kfree(cmd); kfree(comm); resume_smmu: - memset(&gpu->fault_info, 0, sizeof(gpu->fault_info)); - gpu->aspace->mmu->funcs->resume_translation(gpu->aspace->mmu); - mutex_unlock(&gpu->lock); } @@ -882,7 +881,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, init_waitqueue_head(&gpu->retire_event); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); - kthread_init_work(&gpu->fault_work, fault_worker); priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index e25009150579..bed0692f5adb 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -253,12 +253,6 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3 struct timer_list hangcheck_timer; - /* Fault info for most recent iova fault: */ - struct msm_gpu_fault_info fault_info; - - /* work for handling GPU ioval faults: */ - struct kthread_work fault_work; - /* work for handling GPU recovery: */ struct kthread_work recover_work; @@ -705,6 +699,8 @@ static inline void msm_gpu_crashstate_put(struct msm_gpu *gpu) mutex_unlock(&gpu->lock); } +void msm_gpu_fault_crashstate_capture(struct msm_gpu *gpu, struct msm_gpu_fault_info *fault_info); + /* * Simple macro to semi-cleanly add the MAP_PRIV flag for targets that can * support expanded privileges From dedf404be8cf97e6fabbed7ad97000ab816897eb Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 20 May 2025 15:08:58 -0400 Subject: [PATCH 085/497] drm/msm: Delete resume_translation() Unused since the previous commit. Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/654890/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a2xx_gpummu.c | 5 ----- drivers/gpu/drm/msm/msm_iommu.c | 13 ------------- drivers/gpu/drm/msm/msm_mmu.h | 1 - 3 files changed, 19 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c index 39641551eeb6..4280f71e472a 100644 --- a/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c +++ b/drivers/gpu/drm/msm/adreno/a2xx_gpummu.c @@ -71,10 +71,6 @@ static int a2xx_gpummu_unmap(struct msm_mmu *mmu, uint64_t iova, size_t len) return 0; } -static void a2xx_gpummu_resume_translation(struct msm_mmu *mmu) -{ -} - static void a2xx_gpummu_destroy(struct msm_mmu *mmu) { struct a2xx_gpummu *gpummu = to_a2xx_gpummu(mmu); @@ -90,7 +86,6 @@ static const struct msm_mmu_funcs funcs = { .map = a2xx_gpummu_map, .unmap = a2xx_gpummu_unmap, .destroy = a2xx_gpummu_destroy, - .resume_translation = a2xx_gpummu_resume_translation, }; struct msm_mmu *a2xx_gpummu_new(struct device *dev, struct msm_gpu *gpu) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index fd73dcd3f30e..aae885d048d0 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -345,7 +345,6 @@ static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev unsigned long iova, int flags, void *arg) { struct msm_iommu *iommu = arg; - struct msm_mmu *mmu = &iommu->base; struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(iommu->base.dev); struct adreno_smmu_fault_info info, *ptr = NULL; @@ -359,9 +358,6 @@ static int msm_gpu_fault_handler(struct iommu_domain *domain, struct device *dev pr_warn_ratelimited("*** fault: iova=%16lx, flags=%d\n", iova, flags); - if (mmu->funcs->resume_translation) - mmu->funcs->resume_translation(mmu); - return 0; } @@ -376,14 +372,6 @@ static int msm_disp_fault_handler(struct iommu_domain *domain, struct device *de return -ENOSYS; } -static void msm_iommu_resume_translation(struct msm_mmu *mmu) -{ - struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev); - - if (adreno_smmu->resume_translation) - adreno_smmu->resume_translation(adreno_smmu->cookie, true); -} - static void msm_iommu_detach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); @@ -431,7 +419,6 @@ static const struct msm_mmu_funcs funcs = { .map = msm_iommu_map, .unmap = msm_iommu_unmap, .destroy = msm_iommu_destroy, - .resume_translation = msm_iommu_resume_translation, }; struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index daf91529e02b..c3d17aae88b0 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -15,7 +15,6 @@ struct msm_mmu_funcs { size_t len, int prot); int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len); void (*destroy)(struct msm_mmu *mmu); - void (*resume_translation)(struct msm_mmu *mmu); }; enum msm_mmu_type { From b13044092c1e30453d2f7e9be596d3a2616582a0 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 20 May 2025 15:08:59 -0400 Subject: [PATCH 086/497] drm/msm: Temporarily disable stall-on-fault after a page fault When things go wrong, the GPU is capable of quickly generating millions of faulting translation requests per second. When that happens, in the stall-on-fault model each access will stall until it wins the race to signal the fault and then the RESUME register is written. This slows processing page faults to a crawl as the GPU can generate faults much faster than the CPU can acknowledge them. It also means that all available resources in the SMMU are saturated waiting for the stalled transactions, so that other transactions such as transactions generated by the GMU, which shares translation resources with the GPU, cannot proceed. This causes a GMU watchdog timeout, which leads to a failed reset because GX cannot collapse when there is a transaction pending and a permanently hung GPU. On older platforms with qcom,smmu-v2, it seems that when one transaction is stalled subsequent faulting transactions are terminated, which avoids this problem, but the MMU-500 follows the spec here. To work around these problems, disable stall-on-fault as soon as we get a page fault until a cooldown period after pagefaults stop. This allows the GMU some guaranteed time to continue working. We only use stall-on-fault to halt the GPU while we collect a devcoredump and we always terminate the transaction afterward, so it's fine to miss some subsequent page faults. We also keep it disabled so long as the current devcoredump hasn't been deleted, because in that case we likely won't capture another one if there's a fault. After this commit HFI messages still occasionally time out, because the crashdump handler doesn't run fast enough to let the GMU resume, but the driver seems to recover from it. This will probably go away after the HFI timeout is increased. Signed-off-by: Connor Abbott Reviewed-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/654891/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 ++ drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 +++ drivers/gpu/drm/msm/adreno/adreno_gpu.c | 40 ++++++++++++++++++++++++- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 ++ drivers/gpu/drm/msm/msm_debugfs.c | 32 ++++++++++++++++++++ drivers/gpu/drm/msm/msm_drv.c | 4 +++ drivers/gpu/drm/msm/msm_drv.h | 23 ++++++++++++++ drivers/gpu/drm/msm/msm_iommu.c | 9 ++++++ drivers/gpu/drm/msm/msm_mmu.h | 1 + 9 files changed, 116 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 650e5bac225f..60aef0796236 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -131,6 +131,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) { ring->cur_ctx_seqno = 0; a5xx_submit_in_rb(gpu, submit); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index bf3758f010f4..a7ffd92de3bb 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -212,6 +212,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + a6xx_set_pagetable(a6xx_gpu, ring, submit); get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP(0), @@ -335,6 +337,8 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + adreno_check_and_reenable_stall(adreno_gpu); + /* * Toggle concurrent binning for pagetable switch and set the thread to * BR since only it can execute the pagetable switch packets. diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 5ebd5a6ea143..86bff915c3e7 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -259,16 +259,54 @@ u64 adreno_private_address_space_size(struct msm_gpu *gpu) return BIT(ttbr1_cfg->ias) - ADRENO_VM_START; } +void adreno_check_and_reenable_stall(struct adreno_gpu *adreno_gpu) +{ + struct msm_gpu *gpu = &adreno_gpu->base; + struct msm_drm_private *priv = gpu->dev->dev_private; + unsigned long flags; + + /* + * Wait until the cooldown period has passed and we would actually + * collect a crashdump to re-enable stall-on-fault. + */ + spin_lock_irqsave(&priv->fault_stall_lock, flags); + if (!priv->stall_enabled && + ktime_after(ktime_get(), priv->stall_reenable_time) && + !READ_ONCE(gpu->crashstate)) { + priv->stall_enabled = true; + + gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, true); + } + spin_unlock_irqrestore(&priv->fault_stall_lock, flags); +} + #define ARM_SMMU_FSR_TF BIT(1) #define ARM_SMMU_FSR_PF BIT(3) #define ARM_SMMU_FSR_EF BIT(4) +#define ARM_SMMU_FSR_SS BIT(30) int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, struct adreno_smmu_fault_info *info, const char *block, u32 scratch[4]) { + struct msm_drm_private *priv = gpu->dev->dev_private; const char *type = "UNKNOWN"; - bool do_devcoredump = info && !READ_ONCE(gpu->crashstate); + bool do_devcoredump = info && (info->fsr & ARM_SMMU_FSR_SS) && + !READ_ONCE(gpu->crashstate); + unsigned long irq_flags; + + /* + * In case there is a subsequent storm of pagefaults, disable + * stall-on-fault for at least half a second. + */ + spin_lock_irqsave(&priv->fault_stall_lock, irq_flags); + if (priv->stall_enabled) { + priv->stall_enabled = false; + + gpu->aspace->mmu->funcs->set_stall(gpu->aspace->mmu, false); + } + priv->stall_reenable_time = ktime_add_ms(ktime_get(), 500); + spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags); /* * Print a default message if we couldn't get the data from the diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index a8f4bf416e64..bc063594a359 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -636,6 +636,8 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, struct adreno_smmu_fault_info *info, const char *block, u32 scratch[4]); +void adreno_check_and_reenable_stall(struct adreno_gpu *gpu); + int adreno_read_speedbin(struct device *dev, u32 *speedbin); /* diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index 7ab607252d18..6af72162cda4 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -208,6 +208,35 @@ DEFINE_DEBUGFS_ATTRIBUTE(shrink_fops, shrink_get, shrink_set, "0x%08llx\n"); +/* + * Return the number of microseconds to wait until stall-on-fault is + * re-enabled. If 0 then it is already enabled or will be re-enabled on the + * next submit (unless there's a leftover devcoredump). This is useful for + * kernel tests that intentionally produce a fault and check the devcoredump to + * wait until the cooldown period is over. + */ + +static int +stall_reenable_time_get(void *data, u64 *val) +{ + struct msm_drm_private *priv = data; + unsigned long irq_flags; + + spin_lock_irqsave(&priv->fault_stall_lock, irq_flags); + + if (priv->stall_enabled) + *val = 0; + else + *val = max(ktime_us_delta(priv->stall_reenable_time, ktime_get()), 0); + + spin_unlock_irqrestore(&priv->fault_stall_lock, irq_flags); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(stall_reenable_time_fops, + stall_reenable_time_get, NULL, + "%lld\n"); static int msm_gem_show(struct seq_file *m, void *arg) { @@ -319,6 +348,9 @@ static void msm_debugfs_gpu_init(struct drm_minor *minor) debugfs_create_bool("disable_err_irq", 0600, minor->debugfs_root, &priv->disable_err_irq); + debugfs_create_file("stall_reenable_time_us", 0400, minor->debugfs_root, + priv, &stall_reenable_time_fops); + gpu_devfreq = debugfs_create_dir("devfreq", minor->debugfs_root); debugfs_create_bool("idle_clamp",0600, gpu_devfreq, diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index f316e6776f67..132d4cac3587 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -245,6 +245,10 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) drm_gem_lru_init(&priv->lru.willneed, &priv->lru.lock); drm_gem_lru_init(&priv->lru.dontneed, &priv->lru.lock); + /* Initialize stall-on-fault */ + spin_lock_init(&priv->fault_stall_lock); + priv->stall_enabled = true; + /* Teach lockdep about lock ordering wrt. shrinker: */ fs_reclaim_acquire(GFP_KERNEL); might_lock(&priv->lru.lock); diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index a65077855201..c8afb1ea6040 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -222,6 +222,29 @@ struct msm_drm_private { * the sw hangcheck mechanism. */ bool disable_err_irq; + + /** + * @fault_stall_lock: + * + * Serialize changes to stall-on-fault state. + */ + spinlock_t fault_stall_lock; + + /** + * @fault_stall_reenable_time: + * + * If stall_enabled is false, when to reenable stall-on-fault. + * Protected by @fault_stall_lock. + */ + ktime_t stall_reenable_time; + + /** + * @stall_enabled: + * + * Whether stall-on-fault is currently enabled. Protected by + * @fault_stall_lock. + */ + bool stall_enabled; }; const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, uint64_t modifier); diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index aae885d048d0..739ce2c283a4 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -372,6 +372,14 @@ static int msm_disp_fault_handler(struct iommu_domain *domain, struct device *de return -ENOSYS; } +static void msm_iommu_set_stall(struct msm_mmu *mmu, bool enable) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(mmu->dev); + + if (adreno_smmu->set_stall) + adreno_smmu->set_stall(adreno_smmu->cookie, enable); +} + static void msm_iommu_detach(struct msm_mmu *mmu) { struct msm_iommu *iommu = to_msm_iommu(mmu); @@ -419,6 +427,7 @@ static const struct msm_mmu_funcs funcs = { .map = msm_iommu_map, .unmap = msm_iommu_unmap, .destroy = msm_iommu_destroy, + .set_stall = msm_iommu_set_stall, }; struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h index c3d17aae88b0..0c694907140d 100644 --- a/drivers/gpu/drm/msm/msm_mmu.h +++ b/drivers/gpu/drm/msm/msm_mmu.h @@ -15,6 +15,7 @@ struct msm_mmu_funcs { size_t len, int prot); int (*unmap)(struct msm_mmu *mmu, uint64_t iova, size_t len); void (*destroy)(struct msm_mmu *mmu); + void (*set_stall)(struct msm_mmu *mmu, bool enable); }; enum msm_mmu_type { From ad0f54842cd23127070d8c310dd06d8f7add025c Mon Sep 17 00:00:00 2001 From: Ankit Chauhan Date: Wed, 28 May 2025 16:36:04 +0530 Subject: [PATCH 087/497] scsi: mvsas: Fix typos in per-phy comments and SAS cmd port registers Spelling fixes: Deocder --> Decoder Memroy --> Memory This is a non-functional change aimed at improving code clarity. Signed-off-by: Ankit Chauhan Link: https://lore.kernel.org/r/20250528110604.59528-1-ankitchauhan2065@gmail.com Signed-off-by: Martin K. Petersen --- drivers/scsi/mvsas/mv_defs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h index 8ef174cd4d37..3e4124177b2a 100644 --- a/drivers/scsi/mvsas/mv_defs.h +++ b/drivers/scsi/mvsas/mv_defs.h @@ -215,7 +215,7 @@ enum hw_register_bits { /* MVS_Px_INT_STAT, MVS_Px_INT_MASK (per-phy events) */ PHYEV_DEC_ERR = (1U << 24), /* Phy Decoding Error */ - PHYEV_DCDR_ERR = (1U << 23), /* STP Deocder Error */ + PHYEV_DCDR_ERR = (1U << 23), /* STP Decoder Error */ PHYEV_CRC_ERR = (1U << 22), /* STP CRC Error */ PHYEV_UNASSOC_FIS = (1U << 19), /* unassociated FIS rx'd */ PHYEV_AN = (1U << 18), /* SATA async notification */ @@ -347,7 +347,7 @@ enum sas_cmd_port_registers { CMD_SATA_PORT_MEM_CTL0 = 0x158, /* SATA Port Memory Control 0 */ CMD_SATA_PORT_MEM_CTL1 = 0x15c, /* SATA Port Memory Control 1 */ CMD_XOR_MEM_BIST_CTL = 0x160, /* XOR Memory BIST Control */ - CMD_XOR_MEM_BIST_STAT = 0x164, /* XOR Memroy BIST Status */ + CMD_XOR_MEM_BIST_STAT = 0x164, /* XOR Memory BIST Status */ CMD_DMA_MEM_BIST_CTL = 0x168, /* DMA Memory BIST Control */ CMD_DMA_MEM_BIST_STAT = 0x16c, /* DMA Memory BIST Status */ CMD_PORT_MEM_BIST_CTL = 0x170, /* Port Memory BIST Control */ From 9b17621366d210ffee83262a8754086ebbde5e55 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 30 May 2025 12:29:35 -0700 Subject: [PATCH 088/497] scsi: iscsi: Fix incorrect error path labels for flashnode operations Correct the error handling goto labels used when host lookup fails in various flashnode-related event handlers: - iscsi_new_flashnode() - iscsi_del_flashnode() - iscsi_login_flashnode() - iscsi_logout_flashnode() - iscsi_logout_flashnode_sid() scsi_host_put() is not required when shost is NULL, so jumping to the correct label avoids unnecessary operations. These functions previously jumped to the wrong goto label (put_host), which did not match the intended cleanup logic. Use the correct exit labels (exit_new_fnode, exit_del_fnode, etc.) to ensure proper error handling. Also remove the unused put_host label under iscsi_new_flashnode() as it is no longer needed. No functional changes beyond accurate error path correction. Fixes: c6a4bb2ef596 ("[SCSI] scsi_transport_iscsi: Add flash node mgmt support") Signed-off-by: Alok Tiwari Link: https://lore.kernel.org/r/20250530193012.3312911-1-alok.a.tiwari@oracle.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0b8c91bf793f..c75a806496d6 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -3499,7 +3499,7 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.new_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_new_fnode; } index = transport->new_flashnode(shost, data, len); @@ -3509,7 +3509,6 @@ static int iscsi_new_flashnode(struct iscsi_transport *transport, else err = -EIO; -put_host: scsi_host_put(shost); exit_new_fnode: @@ -3534,7 +3533,7 @@ static int iscsi_del_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.del_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_del_fnode; } idx = ev->u.del_flashnode.flashnode_idx; @@ -3576,7 +3575,7 @@ static int iscsi_login_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.login_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_login_fnode; } idx = ev->u.login_flashnode.flashnode_idx; @@ -3628,7 +3627,7 @@ static int iscsi_logout_flashnode(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_fnode; } idx = ev->u.logout_flashnode.flashnode_idx; @@ -3678,7 +3677,7 @@ static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; - goto put_host; + goto exit_logout_sid; } session = iscsi_session_lookup(ev->u.logout_flashnode_sid.sid); From 9697ca0d53e3db357be26d2414276143c4a2cd49 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 3 Jun 2025 20:21:56 +0200 Subject: [PATCH 089/497] scsi: s390: zfcp: Ensure synchronous unit_add Improve the usability of the unit_add sysfs attribute by ensuring that the associated FCP LUN scan processing is completed synchronously. This enables configuration tooling to consistently determine the end of the scan process to allow for serialization of follow-on actions. While the scan process associated with unit_add typically completes synchronously, it is deferred to an asynchronous background process if unit_add is used before initial remote port scanning has completed. This occurs when unit_add is used immediately after setting the associated FCP device online. To ensure synchronous unit_add processing, wait for remote port scanning to complete before initiating the FCP LUN scan. Cc: stable@vger.kernel.org Reviewed-by: M Nikhil Reviewed-by: Nihar Panda Signed-off-by: Peter Oberparleiter Signed-off-by: Nihar Panda Link: https://lore.kernel.org/r/20250603182252.2287285-2-niharp@linux.ibm.com Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 41e36af35488..90a84ae98b97 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -449,6 +449,8 @@ static ssize_t zfcp_sysfs_unit_add_store(struct device *dev, if (kstrtoull(buf, 0, (unsigned long long *) &fcp_lun)) return -EINVAL; + flush_work(&port->rport_work); + retval = zfcp_unit_add(port, fcp_lun); if (retval) return retval; From b1c9e797ad37d188675505b66a3a4bbeea5d9560 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 20 May 2025 18:28:05 -0400 Subject: [PATCH 090/497] drm/msm: Fix CP_RESET_CONTEXT_STATE bitfield names Based on kgsl. Fixes: af66706accdf ("drm/msm/a6xx: Add skeleton A7xx support") Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/654922/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 5a6ae9fc3194..462713401622 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -2255,7 +2255,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) - + + From 2b520c6104f34e3a548525173c38ebca4402cac3 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 20 May 2025 18:28:06 -0400 Subject: [PATCH 091/497] drm/msm/a7xx: Call CP_RESET_CONTEXT_STATE Calling this packet is necessary when we switch contexts because there are various pieces of state used by userspace to synchronize between BR and BV that are persistent across submits and we need to make sure that they are in a "safe" state when switching contexts. Otherwise a userspace submission in one context could cause another context to function incorrectly and hang, effectively a denial of service (although without leaking data). This was missed during initial a7xx bringup. Fixes: af66706accdf ("drm/msm/a6xx: Add skeleton A7xx support") Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/654924/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index a7ffd92de3bb..491fde0083a2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -130,6 +130,20 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno - 1); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BOTH); + + /* Reset state used to synchronize BR and BV */ + OUT_PKT7(ring, CP_RESET_CONTEXT_STATE, 1); + OUT_RING(ring, + CP_RESET_CONTEXT_STATE_0_CLEAR_ON_CHIP_TS | + CP_RESET_CONTEXT_STATE_0_CLEAR_RESOURCE_TABLE | + CP_RESET_CONTEXT_STATE_0_CLEAR_BV_BR_COUNTER | + CP_RESET_CONTEXT_STATE_0_RESET_GLOBAL_LOCAL_TS); + + OUT_PKT7(ring, CP_THREAD_CONTROL, 1); + OUT_RING(ring, CP_SET_THREAD_BR); } if (!sysprof) { From ba64c6737f8656aa86e9fbcc1ff56e56316e62f6 Mon Sep 17 00:00:00 2001 From: Ryan Eatmon Date: Sat, 24 May 2025 21:25:37 +0530 Subject: [PATCH 092/497] drivers: gpu: drm: msm: registers: improve reproducibility The files generated by gen_header.py capture the source path to the input files and the date. While that can be informative, it varies based on where and when the kernel was built as the full path is captured. Since all of the files that this tool is run on is under the drivers directory, this modifies the application to strip all of the path before drivers. Additionally it prints instead of the date. Signed-off-by: Ryan Eatmon Signed-off-by: Bruce Ashfield Signed-off-by: Viswanath Kraleti Acked-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/655599/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/registers/gen_header.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/registers/gen_header.py b/drivers/gpu/drm/msm/registers/gen_header.py index 3926485bb197..a409404627c7 100644 --- a/drivers/gpu/drm/msm/registers/gen_header.py +++ b/drivers/gpu/drm/msm/registers/gen_header.py @@ -11,6 +11,7 @@ import collections import argparse import time import datetime +import re class Error(Exception): def __init__(self, message): @@ -877,13 +878,14 @@ The rules-ng-ng source files this header was generated from are: """) maxlen = 0 for filepath in p.xml_files: - maxlen = max(maxlen, len(filepath)) + new_filepath = re.sub("^.+drivers","drivers",filepath) + maxlen = max(maxlen, len(new_filepath)) for filepath in p.xml_files: - pad = " " * (maxlen - len(filepath)) + pad = " " * (maxlen - len(new_filepath)) filesize = str(os.path.getsize(filepath)) filesize = " " * (7 - len(filesize)) + filesize filetime = time.ctime(os.path.getmtime(filepath)) - print("- " + filepath + pad + " (" + filesize + " bytes, from " + filetime + ")") + print("- " + new_filepath + pad + " (" + filesize + " bytes, from )") if p.copyright_year: current_year = str(datetime.date.today().year) print() From 1453b532d193a0b1b94429c657365e14dd4f361e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 9 Jun 2025 11:24:35 -0700 Subject: [PATCH 093/497] drm/msm: Rename add_components_mdp() To better match add_gpu_components(). Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/657700/ --- drivers/gpu/drm/msm/msm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 132d4cac3587..8eeb54a367f0 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -930,7 +930,7 @@ static const struct drm_driver msm_driver = { * is no external component that we need to add since LVDS is within MDP4 * itself. */ -static int add_components_mdp(struct device *master_dev, +static int add_mdp_components(struct device *master_dev, struct component_match **matchptr) { struct device_node *np = master_dev->of_node; @@ -1075,7 +1075,7 @@ int msm_drv_probe(struct device *master_dev, /* Add mdp components if we have KMS. */ if (kms_init) { - ret = add_components_mdp(master_dev, &match); + ret = add_mdp_components(master_dev, &match); if (ret) return ret; } From 1efb73791c828f3d1ac68ebbc5eddc35a562f9d0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 9 Jun 2025 11:24:36 -0700 Subject: [PATCH 094/497] drm/msm/adreno: Pass device_node to find_chipid() We are going to want to re-use this before the component is bound, when we don't yet have the device pointer (but we do have the of node). v2: use %pOF Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/657705/ --- drivers/gpu/drm/msm/adreno/adreno_device.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index f5e1490d07c1..f176796e238d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -137,9 +137,8 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) return NULL; } -static int find_chipid(struct device *dev, uint32_t *chipid) +static int find_chipid(struct device_node *node, uint32_t *chipid) { - struct device_node *node = dev->of_node; const char *compat; int ret; @@ -173,11 +172,12 @@ static int find_chipid(struct device *dev, uint32_t *chipid) /* and if that fails, fall back to legacy "qcom,chipid" property: */ ret = of_property_read_u32(node, "qcom,chipid", chipid); if (ret) { - DRM_DEV_ERROR(dev, "could not parse qcom,chipid: %d\n", ret); + DRM_ERROR("%pOF: could not parse qcom,chipid: %d\n", + node, ret); return ret; } - dev_warn(dev, "Using legacy qcom,chipid binding!\n"); + pr_warn("%pOF: Using legacy qcom,chipid binding!\n", node); return 0; } @@ -191,7 +191,7 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) struct msm_gpu *gpu; int ret; - ret = find_chipid(dev, &config.chip_id); + ret = find_chipid(dev->of_node, &config.chip_id); if (ret) return ret; From 93310053663ba647e402ef67e4bb18ec06ff8dc4 Mon Sep 17 00:00:00 2001 From: Philipp Kerling Date: Sun, 8 Jun 2025 20:59:00 +0200 Subject: [PATCH 095/497] smb: client: disable path remapping with POSIX extensions If SMB 3.1.1 POSIX Extensions are available and negotiated, the client should be able to use all characters and not remap anything. Currently, the user has to explicitly request this behavior by specifying the "nomapposix" mount option. Link: https://lore.kernel.org/4195bb677b33d680e77549890a4f4dd3b474ceaf.camel@rx2.rx-server.de Signed-off-by: Philipp Kerling Reviewed-by: Namjae Jeon Signed-off-by: Steve French --- Documentation/admin-guide/cifs/usage.rst | 2 ++ fs/smb/client/connect.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index c09674a75a9e..d989ae5778ba 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -270,6 +270,8 @@ configured for Unix Extensions (and the client has not disabled illegal Windows/NTFS/SMB characters to a remap range (this mount parameter is the default for SMB3). This remap (``mapposix``) range is also compatible with Mac (and "Services for Mac" on some older Windows). +When POSIX Extensions for SMB 3.1.1 are negotiated, remapping is automatically +disabled. CIFS VFS Mount Options ====================== diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 28bc33496623..c4fb80b37738 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -3718,9 +3718,15 @@ int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) goto out; } - /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */ - if (tcon->posix_extensions) + /* + * if new SMB3.11 POSIX extensions are supported, do not change anything in the + * path (i.e., do not remap / and \ and do not map any special characters) + */ + if (tcon->posix_extensions) { cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS; + cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MAP_SFM_CHR | + CIFS_MOUNT_MAP_SPECIAL_CHR); + } #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY /* tell server which Unix caps we support */ From 82ffbe7776d0ac084031f114167712269bf3d832 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Jun 2025 16:51:27 +0000 Subject: [PATCH 096/497] net_sched: sch_sfq: fix a potential crash on gso_skb handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SFQ has an assumption of always being able to queue at least one packet. However, after the blamed commit, sch->q.len can be inflated by packets in sch->gso_skb, and an enqueue() on an empty SFQ qdisc can be followed by an immediate drop. Fix sfq_drop() to properly clear q->tail in this situation. Tested: ip netns add lb ip link add dev to-lb type veth peer name in-lb netns lb ethtool -K to-lb tso off # force qdisc to requeue gso_skb ip netns exec lb ethtool -K in-lb gro on # enable NAPI ip link set dev to-lb up ip -netns lb link set dev in-lb up ip addr add dev to-lb 192.168.20.1/24 ip -netns lb addr add dev in-lb 192.168.20.2/24 tc qdisc replace dev to-lb root sfq limit 100 ip netns exec lb netserver netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & netperf -H 192.168.20.2 -l 100 & Fixes: a53851e2c321 ("net: sched: explicit locking in gso_cpu fallback") Reported-by: Marcus Wichelmann Closes: https://lore.kernel.org/netdev/9da42688-bfaa-4364-8797-e9271f3bdaef@hetzner-cloud.de/ Signed-off-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20250606165127.3629486-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_sfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b912ad99aa15..77fa02f2bfcd 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -310,7 +310,10 @@ static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ x = q->tail->next; slot = &q->slots[x]; - q->tail->next = slot->next; + if (slot->next == x) + q->tail = NULL; /* no more active slots */ + else + q->tail->next = slot->next; q->ht[slot->hash] = SFQ_EMPTY_SLOT; goto drop; } From 0838fc3e6718743d595bf26e0e69ae55aa51fad1 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 9 Jun 2025 11:24:37 -0700 Subject: [PATCH 097/497] drm/msm/adreno: Check for recognized GPU before bind If we have a newer dtb than kernel, we could end up in a situation where the GPU device is present in the dtb, but not in the drivers device table. We don't want this to prevent the display from probing. So check that we recognize the GPU before adding the GPU component. v2: use %pOF Signed-off-by: Rob Clark Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/657701/ --- drivers/gpu/drm/msm/adreno/adreno_device.c | 29 ++++++++++++++++++---- drivers/gpu/drm/msm/msm_drv.c | 2 +- drivers/gpu/drm/msm/msm_gpu.h | 1 + 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index f176796e238d..5e7307567239 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -182,6 +182,26 @@ static int find_chipid(struct device_node *node, uint32_t *chipid) return 0; } +bool adreno_has_gpu(struct device_node *node) +{ + const struct adreno_info *info; + uint32_t chip_id; + int ret; + + ret = find_chipid(node, &chip_id); + if (ret) + return false; + + info = adreno_info(chip_id); + if (!info) { + pr_warn("%pOF: Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n", + node, ADRENO_CHIPID_ARGS(chip_id)); + return false; + } + + return true; +} + static int adreno_bind(struct device *dev, struct device *master, void *data) { static struct adreno_platform_config config = {}; @@ -192,18 +212,17 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) int ret; ret = find_chipid(dev->of_node, &config.chip_id); - if (ret) + /* We shouldn't have gotten this far if we can't parse the chip_id */ + if (WARN_ON(ret)) return ret; dev->platform_data = &config; priv->gpu_pdev = to_platform_device(dev); info = adreno_info(config.chip_id); - if (!info) { - dev_warn(drm->dev, "Unknown GPU revision: %"ADRENO_CHIPID_FMT"\n", - ADRENO_CHIPID_ARGS(config.chip_id)); + /* We shouldn't have gotten this far if we don't recognize the GPU: */ + if (!WARN_ON(info)) return -ENXIO; - } config.info = info; diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 8eeb54a367f0..d007687c2446 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1034,7 +1034,7 @@ static int add_gpu_components(struct device *dev, if (!np) return 0; - if (of_device_is_available(np)) + if (of_device_is_available(np) && adreno_has_gpu(np)) drm_of_component_match_add(dev, matchptr, component_compare_of, np); of_node_put(np); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index bed0692f5adb..5bf7cd985b9c 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -662,6 +662,7 @@ msm_gpu_create_private_address_space(struct msm_gpu *gpu, struct task_struct *ta void msm_gpu_cleanup(struct msm_gpu *gpu); struct msm_gpu *adreno_load_gpu(struct drm_device *dev); +bool adreno_has_gpu(struct device_node *node); void __init adreno_register(void); void __exit adreno_unregister(void); From b2f966568faaad326de97481096d0f3dc0971c43 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Fri, 6 Jun 2025 13:57:39 -0700 Subject: [PATCH 098/497] scsi: storvsc: Increase the timeouts to storvsc_timeout Currently storvsc_timeout is only used in storvsc_sdev_configure(), and 5s and 10s are used elsewhere. It turns out that rarely the 5s is not enough on Azure, so let's use storvsc_timeout everywhere. In case a timeout happens and storvsc_channel_init() returns an error, close the VMBus channel so that any host-to-guest messages in the channel's ringbuffer, which might come late, can be safely ignored. Add a "const" to storvsc_timeout. Cc: stable@kernel.org Signed-off-by: Dexuan Cui Link: https://lore.kernel.org/r/1749243459-10419-1-git-send-email-decui@microsoft.com Reviewed-by: Long Li Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 2e6b2412d2c9..d9e59204a9c3 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -362,7 +362,7 @@ MODULE_PARM_DESC(ring_avail_percent_lowater, /* * Timeout in seconds for all devices managed by this driver. */ -static int storvsc_timeout = 180; +static const int storvsc_timeout = 180; #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) static struct scsi_transport_template *fc_transport_template; @@ -768,7 +768,7 @@ static void handle_multichannel_storage(struct hv_device *device, int max_chns) return; } - t = wait_for_completion_timeout(&request->wait_event, 10*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) { dev_err(dev, "Failed to create sub-channel: timed out\n"); return; @@ -833,7 +833,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device, if (ret != 0) return ret; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return -ETIMEDOUT; @@ -1350,6 +1350,8 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size, return ret; ret = storvsc_channel_init(device, is_fc); + if (ret) + vmbus_close(device->channel); return ret; } @@ -1668,7 +1670,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd) if (ret != 0) return FAILED; - t = wait_for_completion_timeout(&request->wait_event, 5*HZ); + t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); if (t == 0) return TIMEOUT_ERROR; From 5c3ba81923e02adae354ec8afd006f93289b4a3c Mon Sep 17 00:00:00 2001 From: Rajashekhar M A Date: Fri, 6 Jun 2025 15:59:24 +0200 Subject: [PATCH 099/497] scsi: error: alua: I/O errors for ALUA state transitions When a host is configured with a few LUNs and I/O is running, injecting FC faults repeatedly leads to path recovery problems. The LUNs have 4 paths each and 3 of them come back active after say an FC fault which makes 2 of the paths go down, instead of all 4. This happens after several iterations of continuous FC faults. Reason here is that we're returning an I/O error whenever we're encountering sense code 06/04/0a (LOGICAL UNIT NOT ACCESSIBLE, ASYMMETRIC ACCESS STATE TRANSITION) instead of retrying. Signed-off-by: Rajashekhar M A Signed-off-by: Hannes Reinecke Link: https://lore.kernel.org/r/20250606135924.27397-1-hare@kernel.org Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 376b8897ab90..746ff6a1f309 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -665,7 +665,8 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) * if the device is in the process of becoming ready, we * should retry. */ - if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) + if ((sshdr.asc == 0x04) && + (sshdr.ascq == 0x01 || sshdr.ascq == 0x0a)) return NEEDS_RETRY; /* * if the device is not started, we need to wake From cd097df4596f3a1e9d75eb8520162de1eb8485b2 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Tue, 10 Jun 2025 07:42:26 +0530 Subject: [PATCH 100/497] powerpc/powernv/memtrace: Fix out of bounds issue in memtrace mmap memtrace mmap issue has an out of bounds issue. This patch fixes the by checking that the requested mapping region size should stay within the allocated region size. Reported-by: Jonathan Greental Fixes: 08a022ad3dfa ("powerpc/powernv/memtrace: Allow mmaping trace buffers") Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250610021227.361980-1-maddy@linux.ibm.com --- arch/powerpc/platforms/powernv/memtrace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index 4ac9808e55a4..2ea30b343354 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -48,11 +48,15 @@ static ssize_t memtrace_read(struct file *filp, char __user *ubuf, static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) { struct memtrace_entry *ent = filp->private_data; + unsigned long ent_nrpages = ent->size >> PAGE_SHIFT; + unsigned long vma_nrpages = vma_pages(vma); - if (ent->size < vma->vm_end - vma->vm_start) + /* The requested page offset should be within object's page count */ + if (vma->vm_pgoff >= ent_nrpages) return -EINVAL; - if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) + /* The requested mapping range should remain within the bounds */ + if (vma_nrpages > ent_nrpages - vma->vm_pgoff) return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); From 0d67f0dee6c9176bc09a5482dd7346e3a0f14d0b Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Tue, 10 Jun 2025 07:42:27 +0530 Subject: [PATCH 101/497] powerpc/vas: Return -EINVAL if the offset is non-zero in mmap() The user space calls mmap() to map VAS window paste address and the kernel returns the complete mapped page for each window. So return -EINVAL if non-zero is passed for offset parameter to mmap(). See Documentation/arch/powerpc/vas-api.rst for mmap() restrictions. Co-developed-by: Jonathan Greental Signed-off-by: Jonathan Greental Reported-by: Jonathan Greental Fixes: dda44eb29c23 ("powerpc/vas: Add VAS user space API") Signed-off-by: Haren Myneni Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250610021227.361980-2-maddy@linux.ibm.com --- arch/powerpc/platforms/book3s/vas-api.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c index 0b6365d85d11..dc6f75d3ac6e 100644 --- a/arch/powerpc/platforms/book3s/vas-api.c +++ b/arch/powerpc/platforms/book3s/vas-api.c @@ -521,6 +521,15 @@ static int coproc_mmap(struct file *fp, struct vm_area_struct *vma) return -EINVAL; } + /* + * Map complete page to the paste address. So the user + * space should pass 0ULL to the offset parameter. + */ + if (vma->vm_pgoff) { + pr_debug("Page offset unsupported to map paste address\n"); + return -EINVAL; + } + /* Ensure instance has an open send window */ if (!txwin) { pr_err("No send window open?\n"); From d9db3a941270d92bbd1a6a6b54a10324484f2f2d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 21 May 2025 19:34:55 -0500 Subject: [PATCH 102/497] platform/x86/amd: pmf: Use device managed allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If setting up smart PC fails for any reason then this can lead to a double free when unloading amd-pmf. This is because dev->buf was freed but never set to NULL and is again freed in amd_pmf_remove(). To avoid subtle allocation bugs in failures leading to a double free change all allocations into device managed allocations. Fixes: 5b1122fc4995f ("platform/x86/amd/pmf: fix cleanup in amd_pmf_init_smart_pc()") Link: https://lore.kernel.org/r/20250512211154.2510397-2-superm1@kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20250522003457.1516679-2-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/core.c | 3 +- drivers/platform/x86/amd/pmf/tee-if.c | 56 ++++++++++----------------- 2 files changed, 22 insertions(+), 37 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 76910601cac8..ef988605c4da 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -280,7 +280,7 @@ int amd_pmf_set_dram_addr(struct amd_pmf_dev *dev, bool alloc_buffer) dev_err(dev->dev, "Invalid CPU id: 0x%x", dev->cpu_id); } - dev->buf = kzalloc(dev->mtable_size, GFP_KERNEL); + dev->buf = devm_kzalloc(dev->dev, dev->mtable_size, GFP_KERNEL); if (!dev->buf) return -ENOMEM; } @@ -493,7 +493,6 @@ static void amd_pmf_remove(struct platform_device *pdev) mutex_destroy(&dev->lock); mutex_destroy(&dev->update_mutex); mutex_destroy(&dev->cb_mutex); - kfree(dev->buf); } static const struct attribute_group *amd_pmf_driver_groups[] = { diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index d3bd12ad036a..027e992b7147 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -358,30 +358,28 @@ static ssize_t amd_pmf_get_pb_data(struct file *filp, const char __user *buf, return -EINVAL; /* re-alloc to the new buffer length of the policy binary */ - new_policy_buf = memdup_user(buf, length); - if (IS_ERR(new_policy_buf)) - return PTR_ERR(new_policy_buf); + new_policy_buf = devm_kzalloc(dev->dev, length, GFP_KERNEL); + if (!new_policy_buf) + return -ENOMEM; - kfree(dev->policy_buf); + if (copy_from_user(new_policy_buf, buf, length)) { + devm_kfree(dev->dev, new_policy_buf); + return -EFAULT; + } + + devm_kfree(dev->dev, dev->policy_buf); dev->policy_buf = new_policy_buf; dev->policy_sz = length; - if (!amd_pmf_pb_valid(dev)) { - ret = -EINVAL; - goto cleanup; - } + if (!amd_pmf_pb_valid(dev)) + return -EINVAL; amd_pmf_hex_dump_pb(dev); ret = amd_pmf_start_policy_engine(dev); if (ret < 0) - goto cleanup; + return ret; return length; - -cleanup: - kfree(dev->policy_buf); - dev->policy_buf = NULL; - return ret; } static const struct file_operations pb_fops = { @@ -532,13 +530,13 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) dev->policy_base = devm_ioremap_resource(dev->dev, dev->res); if (IS_ERR(dev->policy_base)) { ret = PTR_ERR(dev->policy_base); - goto err_free_dram_buf; + goto err_cancel_work; } - dev->policy_buf = kzalloc(dev->policy_sz, GFP_KERNEL); + dev->policy_buf = devm_kzalloc(dev->dev, dev->policy_sz, GFP_KERNEL); if (!dev->policy_buf) { ret = -ENOMEM; - goto err_free_dram_buf; + goto err_cancel_work; } memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz); @@ -546,21 +544,21 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) if (!amd_pmf_pb_valid(dev)) { dev_info(dev->dev, "No Smart PC policy present\n"); ret = -EINVAL; - goto err_free_policy; + goto err_cancel_work; } amd_pmf_hex_dump_pb(dev); - dev->prev_data = kzalloc(sizeof(*dev->prev_data), GFP_KERNEL); + dev->prev_data = devm_kzalloc(dev->dev, sizeof(*dev->prev_data), GFP_KERNEL); if (!dev->prev_data) { ret = -ENOMEM; - goto err_free_policy; + goto err_cancel_work; } for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); if (ret) - goto err_free_prev_data; + goto err_cancel_work; ret = amd_pmf_start_policy_engine(dev); switch (ret) { @@ -575,7 +573,7 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) default: ret = -EINVAL; amd_pmf_tee_deinit(dev); - goto err_free_prev_data; + goto err_cancel_work; } if (status) @@ -584,7 +582,7 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) if (!status && !pb_side_load) { ret = -EINVAL; - goto err_free_prev_data; + goto err_cancel_work; } if (pb_side_load) @@ -600,12 +598,6 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) if (pb_side_load && dev->esbin) amd_pmf_remove_pb(dev); amd_pmf_tee_deinit(dev); -err_free_prev_data: - kfree(dev->prev_data); -err_free_policy: - kfree(dev->policy_buf); -err_free_dram_buf: - kfree(dev->buf); err_cancel_work: cancel_delayed_work_sync(&dev->pb_work); @@ -621,11 +613,5 @@ void amd_pmf_deinit_smart_pc(struct amd_pmf_dev *dev) amd_pmf_remove_pb(dev); cancel_delayed_work_sync(&dev->pb_work); - kfree(dev->prev_data); - dev->prev_data = NULL; - kfree(dev->policy_buf); - dev->policy_buf = NULL; - kfree(dev->buf); - dev->buf = NULL; amd_pmf_tee_deinit(dev); } From 93103d56650d7a38ed37ba4041578310f82776ae Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 21 May 2025 19:34:56 -0500 Subject: [PATCH 103/497] platform/x86/amd: pmf: Prevent amd_pmf_tee_deinit() from running twice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If any of the tee init fails, pass up the errors and clear the tee_ctx pointer. This will prevent cleaning up multiple times. Fixes: ac052d8c08f9d ("platform/x86/amd/pmf: Add PMF TEE interface") Suggested-by: Dan Carpenter Link: https://lore.kernel.org/r/20250512211154.2510397-3-superm1@kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20250522003457.1516679-3-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/tee-if.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 027e992b7147..76efce48a45c 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -420,12 +420,12 @@ static int amd_pmf_ta_open_session(struct tee_context *ctx, u32 *id, const uuid_ rc = tee_client_open_session(ctx, &sess_arg, NULL); if (rc < 0 || sess_arg.ret != 0) { pr_err("Failed to open TEE session err:%#x, rc:%d\n", sess_arg.ret, rc); - return rc; + return rc ?: -EINVAL; } *id = sess_arg.session; - return rc; + return 0; } static int amd_pmf_register_input_device(struct amd_pmf_dev *dev) @@ -460,7 +460,9 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid) dev->tee_ctx = tee_client_open_context(NULL, amd_pmf_amdtee_ta_match, NULL, NULL); if (IS_ERR(dev->tee_ctx)) { dev_err(dev->dev, "Failed to open TEE context\n"); - return PTR_ERR(dev->tee_ctx); + ret = PTR_ERR(dev->tee_ctx); + dev->tee_ctx = NULL; + return ret; } ret = amd_pmf_ta_open_session(dev->tee_ctx, &dev->session_id, uuid); @@ -500,9 +502,12 @@ static int amd_pmf_tee_init(struct amd_pmf_dev *dev, const uuid_t *uuid) static void amd_pmf_tee_deinit(struct amd_pmf_dev *dev) { + if (!dev->tee_ctx) + return; tee_shm_free(dev->fw_shm_pool); tee_client_close_session(dev->tee_ctx, dev->session_id); tee_client_close_context(dev->tee_ctx); + dev->tee_ctx = NULL; } int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) From d7186dfd41924ef01ef490be5b82ab63cc417b31 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 21 May 2025 19:34:57 -0500 Subject: [PATCH 104/497] platform/x86/amd: pmf: Simplify error flow in amd_pmf_init_smart_pc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5b1122fc4995f ("platform/x86/amd/pmf: fix cleanup in amd_pmf_init_smart_pc()") adjusted the error handling flow to use a ladder but this isn't actually needed because work is only scheduled in amd_pmf_start_policy_engine() and with device managed cleanups pointers for allocations don't need to be freed. Adjust the error flow to a single call to amd_pmf_deinit_smart_pc() for the cases that need to clean up. Cc: Dan Carpenter Link: https://lore.kernel.org/r/20250512211154.2510397-4-superm1@kernel.org Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20250522003457.1516679-4-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmf/tee-if.c | 55 ++++++++------------------- 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c index 76efce48a45c..4f626ebcb619 100644 --- a/drivers/platform/x86/amd/pmf/tee-if.c +++ b/drivers/platform/x86/amd/pmf/tee-if.c @@ -530,64 +530,45 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) ret = amd_pmf_set_dram_addr(dev, true); if (ret) - goto err_cancel_work; + return ret; dev->policy_base = devm_ioremap_resource(dev->dev, dev->res); - if (IS_ERR(dev->policy_base)) { - ret = PTR_ERR(dev->policy_base); - goto err_cancel_work; - } + if (IS_ERR(dev->policy_base)) + return PTR_ERR(dev->policy_base); dev->policy_buf = devm_kzalloc(dev->dev, dev->policy_sz, GFP_KERNEL); - if (!dev->policy_buf) { - ret = -ENOMEM; - goto err_cancel_work; - } + if (!dev->policy_buf) + return -ENOMEM; memcpy_fromio(dev->policy_buf, dev->policy_base, dev->policy_sz); if (!amd_pmf_pb_valid(dev)) { dev_info(dev->dev, "No Smart PC policy present\n"); - ret = -EINVAL; - goto err_cancel_work; + return -EINVAL; } amd_pmf_hex_dump_pb(dev); dev->prev_data = devm_kzalloc(dev->dev, sizeof(*dev->prev_data), GFP_KERNEL); - if (!dev->prev_data) { - ret = -ENOMEM; - goto err_cancel_work; - } + if (!dev->prev_data) + return -ENOMEM; for (i = 0; i < ARRAY_SIZE(amd_pmf_ta_uuid); i++) { ret = amd_pmf_tee_init(dev, &amd_pmf_ta_uuid[i]); if (ret) - goto err_cancel_work; + return ret; ret = amd_pmf_start_policy_engine(dev); - switch (ret) { - case TA_PMF_TYPE_SUCCESS: - status = true; - break; - case TA_ERROR_CRYPTO_INVALID_PARAM: - case TA_ERROR_CRYPTO_BIN_TOO_LARGE: - amd_pmf_tee_deinit(dev); - status = false; - break; - default: - ret = -EINVAL; - amd_pmf_tee_deinit(dev); - goto err_cancel_work; - } - + dev_dbg(dev->dev, "start policy engine ret: %d\n", ret); + status = ret == TA_PMF_TYPE_SUCCESS; if (status) break; + amd_pmf_tee_deinit(dev); } if (!status && !pb_side_load) { ret = -EINVAL; - goto err_cancel_work; + goto err; } if (pb_side_load) @@ -595,16 +576,12 @@ int amd_pmf_init_smart_pc(struct amd_pmf_dev *dev) ret = amd_pmf_register_input_device(dev); if (ret) - goto err_pmf_remove_pb; + goto err; return 0; -err_pmf_remove_pb: - if (pb_side_load && dev->esbin) - amd_pmf_remove_pb(dev); - amd_pmf_tee_deinit(dev); -err_cancel_work: - cancel_delayed_work_sync(&dev->pb_work); +err: + amd_pmf_deinit_smart_pc(dev); return ret; } From a2f32c7467e843727f20cae0e9b1545e1504a977 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Mon, 9 Jun 2025 13:46:55 -0500 Subject: [PATCH 105/497] platform/x86: dell_rbu: Fix lock context warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a sparse lock context warning. Signed-off-by: Stuart Hayes Link: https://lore.kernel.org/r/20250609184659.7210-2-stuart.w.hayes@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell_rbu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell_rbu.c b/drivers/platform/x86/dell/dell_rbu.c index e30ca325938c..7b019fb72e86 100644 --- a/drivers/platform/x86/dell/dell_rbu.c +++ b/drivers/platform/x86/dell/dell_rbu.c @@ -91,7 +91,7 @@ static void init_packet_head(void) rbu_data.imagesize = 0; } -static int create_packet(void *data, size_t length) +static int create_packet(void *data, size_t length) __must_hold(&rbu_data.lock) { struct packet_data *newpacket; int ordernum = 0; From 61ce04601e0d8265ec6d2ffa6df5a7e1bce64854 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Mon, 9 Jun 2025 13:46:56 -0500 Subject: [PATCH 106/497] platform/x86: dell_rbu: Fix list usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the correct list head to list_for_each_entry*() when looping through the packet list. Without this patch, reading the packet data via sysfs will show the data incorrectly (because it starts at the wrong packet), and clearing the packet list will result in a NULL pointer dereference. Fixes: d19f359fbdc6 ("platform/x86: dell_rbu: don't open code list_for_each_entry*()") Signed-off-by: Stuart Hayes Link: https://lore.kernel.org/r/20250609184659.7210-3-stuart.w.hayes@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell_rbu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/dell/dell_rbu.c b/drivers/platform/x86/dell/dell_rbu.c index 7b019fb72e86..722979b19e0e 100644 --- a/drivers/platform/x86/dell/dell_rbu.c +++ b/drivers/platform/x86/dell/dell_rbu.c @@ -292,7 +292,7 @@ static int packet_read_list(char *data, size_t * pread_length) remaining_bytes = *pread_length; bytes_read = rbu_data.packet_read_count; - list_for_each_entry(newpacket, (&packet_data_head.list)->next, list) { + list_for_each_entry(newpacket, &packet_data_head.list, list) { bytes_copied = do_packet_read(pdest, newpacket, remaining_bytes, bytes_read, &temp_count); remaining_bytes -= bytes_copied; @@ -315,7 +315,7 @@ static void packet_empty_list(void) { struct packet_data *newpacket, *tmp; - list_for_each_entry_safe(newpacket, tmp, (&packet_data_head.list)->next, list) { + list_for_each_entry_safe(newpacket, tmp, &packet_data_head.list, list) { list_del(&newpacket->list); /* From f4b0fa38d5fefe9aed6ed831f3bd3538c168ee19 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Mon, 9 Jun 2025 13:46:58 -0500 Subject: [PATCH 107/497] platform/x86: dell_rbu: Stop overwriting data buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dell_rbu driver will use memset() to clear the data held by each packet when it is no longer needed (when the driver is unloaded, the packet size is changed, etc). The amount of memory that is cleared (before this patch) is the normal packet size. However, the last packet in the list may be smaller. Fix this to only clear the memory actually used by each packet, to prevent it from writing past the end of data buffer. Because the packet data buffers are allocated with __get_free_pages() (in page-sized increments), this bug could only result in a buffer being overwritten when a packet size larger than one page is used. The only user of the dell_rbu module should be the Dell BIOS update program, which uses a packet size of 4096, so no issues should be seen without the patch, it just blocks the possiblity. Fixes: 6c54c28e69f2 ("[PATCH] dell_rbu: new Dell BIOS update driver") Signed-off-by: Stuart Hayes Link: https://lore.kernel.org/r/20250609184659.7210-5-stuart.w.hayes@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell_rbu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell_rbu.c b/drivers/platform/x86/dell/dell_rbu.c index 722979b19e0e..eb3534ff8dad 100644 --- a/drivers/platform/x86/dell/dell_rbu.c +++ b/drivers/platform/x86/dell/dell_rbu.c @@ -322,7 +322,7 @@ static void packet_empty_list(void) * zero out the RBU packet memory before freeing * to make sure there are no stale RBU packets left in memory */ - memset(newpacket->data, 0, rbu_data.packetsize); + memset(newpacket->data, 0, newpacket->length); set_memory_wb((unsigned long)newpacket->data, 1 << newpacket->ordernum); free_pages((unsigned long) newpacket->data, From ea7af94548159c57747aa8e86408a19192a1e1fe Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Mon, 9 Jun 2025 13:46:59 -0500 Subject: [PATCH 108/497] platform/x86: dell_rbu: Bump version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bump the module version. Signed-off-by: Stuart Hayes Link: https://lore.kernel.org/r/20250609184659.7210-6-stuart.w.hayes@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell_rbu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell_rbu.c b/drivers/platform/x86/dell/dell_rbu.c index eb3534ff8dad..9dd9f2cb074f 100644 --- a/drivers/platform/x86/dell/dell_rbu.c +++ b/drivers/platform/x86/dell/dell_rbu.c @@ -45,7 +45,7 @@ MODULE_AUTHOR("Abhay Salunke "); MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems"); MODULE_LICENSE("GPL"); -MODULE_VERSION("3.2"); +MODULE_VERSION("3.3"); #define BIOS_SCAN_LIMIT 0xffffffff #define MAX_IMAGE_LENGTH 16 From 3fbf25ecf8b7b524b4774b427657d30a24e696ef Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Jun 2025 16:35:57 +0200 Subject: [PATCH 109/497] MAINTAINERS: .mailmap: Update Hans de Goede's email address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm moving all my kernel work over to using my kernel.org email address. Update .mailmap and MAINTAINER entries still using hdegoede@redhat.com. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20250609143558.42941-2-hansg@kernel.org Signed-off-by: Ilpo Järvinen --- .mailmap | 1 + MAINTAINERS | 72 ++++++++++++++++++++++++++--------------------------- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/.mailmap b/.mailmap index c8a21d72241f..645294560b19 100644 --- a/.mailmap +++ b/.mailmap @@ -282,6 +282,7 @@ Gustavo Padovan Gustavo Padovan Hamza Mahfooz Hanjun Guo +Hans de Goede Hans Verkuil Hans Verkuil Harry Yoo <42.hyeyoo@gmail.com> diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..e8f3dc93a569 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -207,7 +207,7 @@ X: arch/*/include/uapi/ X: include/uapi/ ABIT UGURU 1,2 HARDWARE MONITOR DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-hwmon@vger.kernel.org S: Maintained F: drivers/hwmon/abituguru.c @@ -371,7 +371,7 @@ S: Maintained F: drivers/platform/x86/quickstart.c ACPI SERIAL MULTI INSTANTIATE DRIVER -M: Hans de Goede +M: Hans de Goede L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/serial-multi-instantiate.c @@ -3551,7 +3551,7 @@ F: arch/arm64/boot/Makefile F: scripts/make_fit.py ARM64 PLATFORM DRIVERS -M: Hans de Goede +M: Hans de Goede M: Ilpo Järvinen R: Bryan O'Donoghue L: platform-driver-x86@vger.kernel.org @@ -3712,7 +3712,7 @@ F: drivers/platform/x86/asus*.c F: drivers/platform/x86/eeepc*.c ASUS TF103C DOCK DRIVER -M: Hans de Goede +M: Hans de Goede L: platform-driver-x86@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git @@ -5613,14 +5613,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git F: drivers/usb/chipidea/ CHIPONE ICN8318 I2C TOUCHSCREEN DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml F: drivers/input/touchscreen/chipone_icn8318.c CHIPONE ICN8505 I2C TOUCHSCREEN DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained F: drivers/input/touchscreen/chipone_icn8505.c @@ -6917,7 +6917,7 @@ F: include/dt-bindings/pmu/exynos_ppmu.h F: include/linux/devfreq-event.h DEVICE RESOURCE MANAGEMENT HELPERS -M: Hans de Goede +M: Hans de Goede R: Matti Vaittinen S: Maintained F: include/linux/devm-helpers.h @@ -7516,7 +7516,7 @@ F: drivers/gpu/drm/gud/ F: include/drm/gud.h DRM DRIVER FOR GRAIN MEDIA GM12U320 PROJECTORS -M: Hans de Goede +M: Hans de Goede S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git F: drivers/gpu/drm/tiny/gm12u320.c @@ -7916,7 +7916,7 @@ F: drivers/gpu/drm/ci/xfails/vkms* F: drivers/gpu/drm/vkms/ DRM DRIVER FOR VIRTUALBOX VIRTUAL GPU -M: Hans de Goede +M: Hans de Goede L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -8317,7 +8317,7 @@ F: drivers/gpu/drm/panel/ F: include/drm/drm_panel.h DRM PRIVACY-SCREEN CLASS -M: Hans de Goede +M: Hans de Goede L: dri-devel@lists.freedesktop.org S: Maintained T: git https://gitlab.freedesktop.org/drm/misc/kernel.git @@ -10221,7 +10221,7 @@ S: Maintained F: Documentation/devicetree/bindings/connector/gocontroll,moduline-module-slot.yaml GOODIX TOUCHSCREEN -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained F: drivers/input/touchscreen/goodix* @@ -10260,7 +10260,7 @@ F: include/dt-bindings/clock/google,gs101.h K: [gG]oogle.?[tT]ensor GPD POCKET FAN DRIVER -M: Hans de Goede +M: Hans de Goede L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/gpd-pocket-fan.c @@ -11421,7 +11421,7 @@ F: drivers/i2c/busses/i2c-via.c F: drivers/i2c/busses/i2c-viapro.c I2C/SMBUS INTEL CHT WHISKEY COVE PMIC DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-cht-wc.c @@ -12011,13 +12011,13 @@ S: Supported F: sound/soc/intel/ INTEL ATOMISP2 DUMMY / POWER-MANAGEMENT DRIVER -M: Hans de Goede +M: Hans de Goede L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/atomisp2/pm.c INTEL ATOMISP2 LED DRIVER -M: Hans de Goede +M: Hans de Goede L: platform-driver-x86@vger.kernel.org S: Maintained F: drivers/platform/x86/intel/atomisp2/led.c @@ -13678,7 +13678,7 @@ S: Maintained F: drivers/platform/x86/lenovo-wmi-hotkey-utilities.c LETSKETCH HID TABLET DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git @@ -13728,7 +13728,7 @@ F: drivers/ata/sata_gemini.c F: drivers/ata/sata_gemini.h LIBATA SATA AHCI PLATFORM devices support -M: Hans de Goede +M: Hans de Goede L: linux-ide@vger.kernel.org S: Maintained F: drivers/ata/ahci_platform.c @@ -14098,7 +14098,7 @@ F: Documentation/admin-guide/ldm.rst F: block/partitions/ldm.* LOGITECH HID GAMING KEYBOARDS -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git @@ -14780,7 +14780,7 @@ F: Documentation/devicetree/bindings/power/supply/maxim,max17040.yaml F: drivers/power/supply/max17040_battery.c MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS -R: Hans de Goede +R: Hans de Goede R: Krzysztof Kozlowski R: Marek Szyprowski R: Sebastian Krzyszkowiak @@ -15582,7 +15582,7 @@ Q: https://patchwork.kernel.org/project/netdevbpf/list/ F: drivers/net/ethernet/mellanox/mlxfw/ MELLANOX HARDWARE PLATFORM SUPPORT -M: Hans de Goede +M: Hans de Goede M: Ilpo Järvinen M: Vadim Pasternak L: platform-driver-x86@vger.kernel.org @@ -16538,7 +16538,7 @@ S: Maintained F: drivers/platform/surface/surface_gpe.c MICROSOFT SURFACE HARDWARE PLATFORM SUPPORT -M: Hans de Goede +M: Hans de Goede M: Ilpo Järvinen M: Maximilian Luz L: platform-driver-x86@vger.kernel.org @@ -17706,7 +17706,7 @@ F: tools/include/nolibc/ F: tools/testing/selftests/nolibc/ NOVATEK NVT-TS I2C TOUCHSCREEN DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/touchscreen/novatek,nvt-ts.yaml @@ -22708,7 +22708,7 @@ K: fu[57]40 K: [^@]sifive SILEAD TOUCHSCREEN DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org L: platform-driver-x86@vger.kernel.org S: Maintained @@ -22741,7 +22741,7 @@ F: Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml F: drivers/i3c/master/svc-i3c-master.c SIMPLEFB FB DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-fbdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/display/simple-framebuffer.yaml @@ -22870,7 +22870,7 @@ F: Documentation/hwmon/emc2103.rst F: drivers/hwmon/emc2103.c SMSC SCH5627 HARDWARE MONITOR DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-hwmon@vger.kernel.org S: Supported F: Documentation/hwmon/sch5627.rst @@ -23525,7 +23525,7 @@ S: Supported F: Documentation/process/stable-kernel-rules.rst STAGING - ATOMISP DRIVER -M: Hans de Goede +M: Hans de Goede M: Mauro Carvalho Chehab R: Sakari Ailus L: linux-media@vger.kernel.org @@ -23822,7 +23822,7 @@ F: arch/m68k/sun3*/ F: drivers/net/ethernet/i825xx/sun3* SUN4I LOW RES ADC ATTACHED TABLET KEYS DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml @@ -25590,7 +25590,7 @@ F: Documentation/hid/hiddev.rst F: drivers/hid/usbhid/ USB INTEL XHCI ROLE MUX DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/roles/intel-xhci-usb-role-switch.c @@ -25781,7 +25781,7 @@ F: Documentation/firmware-guide/acpi/intel-pmc-mux.rst F: drivers/usb/typec/mux/intel_pmc_mux.c USB TYPEC PI3USB30532 MUX DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/typec/mux/pi3usb30532.c @@ -25810,7 +25810,7 @@ F: drivers/usb/host/uhci* USB VIDEO CLASS M: Laurent Pinchart -M: Hans de Goede +M: Hans de Goede L: linux-media@vger.kernel.org S: Maintained W: http://www.ideasonboard.org/uvc/ @@ -26341,7 +26341,7 @@ F: include/uapi/linux/virtio_snd.h F: sound/virtio/* VIRTUAL BOX GUEST DEVICE DRIVER -M: Hans de Goede +M: Hans de Goede M: Arnd Bergmann M: Greg Kroah-Hartman S: Maintained @@ -26350,7 +26350,7 @@ F: include/linux/vbox_utils.h F: include/uapi/linux/vbox*.h VIRTUAL BOX SHARED FOLDER VFS DRIVER -M: Hans de Goede +M: Hans de Goede L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/vboxsf/* @@ -26604,7 +26604,7 @@ F: drivers/mmc/host/wbsd.* WACOM PROTOCOL 4 SERIAL TABLETS M: Julian Squires -M: Hans de Goede +M: Hans de Goede L: linux-input@vger.kernel.org S: Maintained F: drivers/input/tablet/wacom_serial4.c @@ -26771,7 +26771,7 @@ F: include/linux/wwan.h F: include/uapi/linux/wwan.h X-POWERS AXP288 PMIC DRIVERS -M: Hans de Goede +M: Hans de Goede S: Maintained F: drivers/acpi/pmic/intel_pmic_xpower.c N: axp288 @@ -26863,14 +26863,14 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm F: arch/x86/mm/ X86 PLATFORM ANDROID TABLETS DSDT FIXUP DRIVER -M: Hans de Goede +M: Hans de Goede L: platform-driver-x86@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git F: drivers/platform/x86/x86-android-tablets/ X86 PLATFORM DRIVERS -M: Hans de Goede +M: Hans de Goede M: Ilpo Järvinen L: platform-driver-x86@vger.kernel.org S: Maintained From e0d4a0f1d066f14522049e827107a577444d9183 Mon Sep 17 00:00:00 2001 From: Yang Shen Date: Thu, 29 May 2025 11:40:23 +0800 Subject: [PATCH 110/497] MAINTAINERS: Update HiSilicon GPIO driver maintainer Add Yang Shen as the maintainer of the HiSilicon GPIO driver, replacing Jay Fang. Signed-off-by: Yang Shen Reviewed-by: Jay Fang Link: https://lore.kernel.org/r/20250529034023.3780376-1-shenyang39@huawei.com Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..04734120b522 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10839,7 +10839,7 @@ S: Maintained F: drivers/dma/hisi_dma.c HISILICON GPIO DRIVER -M: Jay Fang +M: Yang Shen L: linux-gpio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml From c0317ad44f45b3c1f0ff46a4e28d14c7bccdedf4 Mon Sep 17 00:00:00 2001 From: Gabriel Dalimonte Date: Sun, 1 Jun 2025 12:45:36 -0400 Subject: [PATCH 111/497] drm/vc4: fix infinite EPROBE_DEFER loop `vc4_hdmi_audio_init` calls `devm_snd_dmaengine_pcm_register` which may return EPROBE_DEFER. Calling `drm_connector_hdmi_audio_init` adds a child device. The driver model docs[1] state that adding a child device prior to returning EPROBE_DEFER may result in an infinite loop. [1] https://www.kernel.org/doc/html/v6.14/driver-api/driver-model/driver.html Fixes: 9640f1437a88 ("drm/vc4: hdmi: switch to using generic HDMI Codec infrastructure") Signed-off-by: Gabriel Dalimonte Link: https://lore.kernel.org/r/20250601-vc4-audio-inf-probe-v2-1-9ad43c7b6147@gmail.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/vc4/vc4_hdmi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index a29a6ef266f9..163d092bd973 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -560,12 +560,6 @@ static int vc4_hdmi_connector_init(struct drm_device *dev, if (ret) return ret; - ret = drm_connector_hdmi_audio_init(connector, dev->dev, - &vc4_hdmi_audio_funcs, - 8, false, -1); - if (ret) - return ret; - drm_connector_helper_add(connector, &vc4_hdmi_connector_helper_funcs); /* @@ -2291,6 +2285,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) return ret; } + ret = drm_connector_hdmi_audio_init(&vc4_hdmi->connector, dev, + &vc4_hdmi_audio_funcs, 8, false, + -1); + if (ret) + return ret; + dai_link->cpus = &vc4_hdmi->audio.cpu; dai_link->codecs = &vc4_hdmi->audio.codec; dai_link->platforms = &vc4_hdmi->audio.platform; From 4823a58093c6dfa20df62b5c18da613621b9716e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 10 Jun 2025 16:38:56 +0530 Subject: [PATCH 112/497] cpufreq: Convert `/// SAFETY` lines to `# Safety` sections Replace `/// SAFETY` comments in doc comments with proper `# Safety` sections, as per rustdoc conventions. Also mark the C FFI callbacks as `unsafe` to correctly reflect their safety requirements. Reported-by: Miguel Ojeda Closes: https://github.com/Rust-for-Linux/linux/issues/1169 Signed-off-by: Viresh Kumar --- rust/kernel/cpufreq.rs | 146 ++++++++++++++++++++++++++++++----------- 1 file changed, 109 insertions(+), 37 deletions(-) diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index b0a9c6182aec..9b995f18aac6 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -1055,8 +1055,11 @@ pub fn new_foreign_owned(dev: &Device) -> Result { impl Registration { /// Driver's `init` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1070,8 +1073,11 @@ extern "C" fn init_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi:: /// Driver's `exit` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1082,8 +1088,11 @@ extern "C" fn exit_callback(ptr: *mut bindings::cpufreq_policy) { /// Driver's `online` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1094,8 +1103,13 @@ extern "C" fn online_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi /// Driver's `offline` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn offline_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn offline_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1106,8 +1120,13 @@ extern "C" fn offline_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ff /// Driver's `suspend` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn suspend_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn suspend_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1118,8 +1137,11 @@ extern "C" fn suspend_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ff /// Driver's `resume` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1130,8 +1152,11 @@ extern "C" fn resume_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi /// Driver's `ready` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1140,8 +1165,13 @@ extern "C" fn ready_callback(ptr: *mut bindings::cpufreq_policy) { /// Driver's `verify` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn verify_callback(ptr: *mut bindings::cpufreq_policy_data) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn verify_callback( + ptr: *mut bindings::cpufreq_policy_data, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1152,8 +1182,13 @@ extern "C" fn verify_callback(ptr: *mut bindings::cpufreq_policy_data) -> kernel /// Driver's `setpolicy` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn setpolicy_callback(ptr: *mut bindings::cpufreq_policy) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn setpolicy_callback( + ptr: *mut bindings::cpufreq_policy, + ) -> kernel::ffi::c_int { from_result(|| { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. @@ -1164,8 +1199,11 @@ extern "C" fn setpolicy_callback(ptr: *mut bindings::cpufreq_policy) -> kernel:: /// Driver's `target` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_callback( ptr: *mut bindings::cpufreq_policy, target_freq: u32, relation: u32, @@ -1180,8 +1218,11 @@ extern "C" fn target_callback( /// Driver's `target_index` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_index_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_index_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_int { @@ -1200,8 +1241,11 @@ extern "C" fn target_index_callback( /// Driver's `fast_switch` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn fast_switch_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn fast_switch_callback( ptr: *mut bindings::cpufreq_policy, target_freq: u32, ) -> kernel::ffi::c_uint { @@ -1212,7 +1256,11 @@ extern "C" fn fast_switch_callback( } /// Driver's `adjust_perf` callback. - extern "C" fn adjust_perf_callback( + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + unsafe extern "C" fn adjust_perf_callback( cpu: u32, min_perf: usize, target_perf: usize, @@ -1225,8 +1273,11 @@ extern "C" fn adjust_perf_callback( /// Driver's `get_intermediate` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn get_intermediate_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn get_intermediate_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_uint { @@ -1243,8 +1294,11 @@ extern "C" fn get_intermediate_callback( /// Driver's `target_intermediate` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn target_intermediate_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn target_intermediate_callback( ptr: *mut bindings::cpufreq_policy, index: u32, ) -> kernel::ffi::c_int { @@ -1262,12 +1316,21 @@ extern "C" fn target_intermediate_callback( } /// Driver's `get` callback. - extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + unsafe extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { PolicyCpu::from_cpu(cpu).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) } /// Driver's `update_limit` callback. - extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) { + /// + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; @@ -1276,8 +1339,11 @@ extern "C" fn update_limits_callback(ptr: *mut bindings::cpufreq_policy) { /// Driver's `bios_limit` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { from_result(|| { let mut policy = PolicyCpu::from_cpu(cpu as u32)?; @@ -1288,8 +1354,11 @@ extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_i /// Driver's `set_boost` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn set_boost_callback( + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn set_boost_callback( ptr: *mut bindings::cpufreq_policy, state: i32, ) -> kernel::ffi::c_int { @@ -1303,8 +1372,11 @@ extern "C" fn set_boost_callback( /// Driver's `register_em` callback. /// - /// SAFETY: Called from C. Inputs must be valid pointers. - extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) { + /// # Safety + /// + /// - This function may only be called from the cpufreq C infrastructure. + /// - The pointer arguments must be valid pointers. + unsafe extern "C" fn register_em_callback(ptr: *mut bindings::cpufreq_policy) { // SAFETY: The `ptr` is guaranteed to be valid by the contract with the C code for the // lifetime of `policy`. let policy = unsafe { Policy::from_raw_mut(ptr) }; From d29fc02caad7f94b62d56ee1b01c954f9c961ba7 Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Mon, 19 May 2025 11:49:45 +0300 Subject: [PATCH 113/497] ata: pata_via: Force PIO for ATAPI devices on VT6415/VT6330 The controller has a hardware bug that can hard hang the system when doing ATAPI DMAs without any trace of what happened. Depending on the device attached, it can also prevent the system from booting. In this case, the system hangs when reading the ATIP from optical media with cdrecord -vvv -atip on an _NEC DVD_RW ND-4571A 1-01 and an Optiarc DVD RW AD-7200A 1.06 attached to an ASRock 990FX Extreme 4, running at UDMA/33. The issue can be reproduced by running the same command with a cygwin build of cdrecord on WinXP, although it requires more attempts to cause it. The hang in that case is also resolved by forcing PIO. It doesn't appear that VIA has produced any drivers for that OS, thus no known workaround exists. HDDs attached to the controller do not suffer from any DMA issues. Cc: stable@vger.kernel.org Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/916677 Signed-off-by: Tasos Sahanidis Link: https://lore.kernel.org/r/20250519085508.1398701-1-tasos@tasossah.com Signed-off-by: Niklas Cassel --- drivers/ata/pata_via.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 696b99720dcb..d82728a01832 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -368,7 +368,8 @@ static unsigned int via_mode_filter(struct ata_device *dev, unsigned int mask) } if (dev->class == ATA_DEV_ATAPI && - dmi_check_system(no_atapi_dma_dmi_table)) { + (dmi_check_system(no_atapi_dma_dmi_table) || + config->id == PCI_DEVICE_ID_VIA_6415)) { ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n"); mask &= ATA_MASK_PIO; } From 33877220b8641b4cde474a4229ea92c0e3637883 Mon Sep 17 00:00:00 2001 From: Tasos Sahanidis Date: Mon, 19 May 2025 11:56:55 +0300 Subject: [PATCH 114/497] ata: libata-acpi: Do not assume 40 wire cable if no devices are enabled On at least an ASRock 990FX Extreme 4 with a VIA VT6330, the devices have not yet been enabled by the first time ata_acpi_cbl_80wire() is called. This means that the ata_for_each_dev loop is never entered, and a 40 wire cable is assumed. The VIA controller on this board does not report the cable in the PCI config space, thus having to fall back to ACPI even though no SATA bridge is present. The _GTM values are correctly reported by the firmware through ACPI, which has already set up faster transfer modes, but due to the above the controller is forced down to a maximum of UDMA/33. Resolve this by modifying ata_acpi_cbl_80wire() to directly return the cable type. First, an unknown cable is assumed which preserves the mode set by the firmware, and then on subsequent calls when the devices have been enabled, an 80 wire cable is correctly detected. Since the function now directly returns the cable type, it is renamed to ata_acpi_cbl_pata_type(). Signed-off-by: Tasos Sahanidis Link: https://lore.kernel.org/r/20250519085945.1399466-1-tasos@tasossah.com Signed-off-by: Niklas Cassel --- drivers/ata/libata-acpi.c | 24 ++++++++++++++++-------- drivers/ata/pata_via.c | 6 ++---- include/linux/libata.h | 7 +++---- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index b7f0bf795521..f2140fc06ba0 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -514,15 +514,19 @@ unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, EXPORT_SYMBOL_GPL(ata_acpi_gtm_xfermask); /** - * ata_acpi_cbl_80wire - Check for 80 wire cable + * ata_acpi_cbl_pata_type - Return PATA cable type * @ap: Port to check - * @gtm: GTM data to use * - * Return 1 if the @gtm indicates the BIOS selected an 80wire mode. + * Return ATA_CBL_PATA* according to the transfer mode selected by BIOS */ -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm) +int ata_acpi_cbl_pata_type(struct ata_port *ap) { struct ata_device *dev; + int ret = ATA_CBL_PATA_UNK; + const struct ata_acpi_gtm *gtm = ata_acpi_init_gtm(ap); + + if (!gtm) + return ATA_CBL_PATA40; ata_for_each_dev(dev, &ap->link, ENABLED) { unsigned int xfer_mask, udma_mask; @@ -530,13 +534,17 @@ int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm) xfer_mask = ata_acpi_gtm_xfermask(dev, gtm); ata_unpack_xfermask(xfer_mask, NULL, NULL, &udma_mask); - if (udma_mask & ~ATA_UDMA_MASK_40C) - return 1; + ret = ATA_CBL_PATA40; + + if (udma_mask & ~ATA_UDMA_MASK_40C) { + ret = ATA_CBL_PATA80; + break; + } } - return 0; + return ret; } -EXPORT_SYMBOL_GPL(ata_acpi_cbl_80wire); +EXPORT_SYMBOL_GPL(ata_acpi_cbl_pata_type); static void ata_acpi_gtf_to_tf(struct ata_device *dev, const struct ata_acpi_gtf *gtf, diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index d82728a01832..bb80e7800dcb 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -201,11 +201,9 @@ static int via_cable_detect(struct ata_port *ap) { two drives */ if (ata66 & (0x10100000 >> (16 * ap->port_no))) return ATA_CBL_PATA80; + /* Check with ACPI so we can spot BIOS reported SATA bridges */ - if (ata_acpi_init_gtm(ap) && - ata_acpi_cbl_80wire(ap, ata_acpi_init_gtm(ap))) - return ATA_CBL_PATA80; - return ATA_CBL_PATA40; + return ata_acpi_cbl_pata_type(ap); } static int via_pre_reset(struct ata_link *link, unsigned long deadline) diff --git a/include/linux/libata.h b/include/linux/libata.h index 31be45fd47a6..1e5aec839041 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1352,7 +1352,7 @@ int ata_acpi_stm(struct ata_port *ap, const struct ata_acpi_gtm *stm); int ata_acpi_gtm(struct ata_port *ap, struct ata_acpi_gtm *stm); unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, const struct ata_acpi_gtm *gtm); -int ata_acpi_cbl_80wire(struct ata_port *ap, const struct ata_acpi_gtm *gtm); +int ata_acpi_cbl_pata_type(struct ata_port *ap); #else static inline const struct ata_acpi_gtm *ata_acpi_init_gtm(struct ata_port *ap) { @@ -1377,10 +1377,9 @@ static inline unsigned int ata_acpi_gtm_xfermask(struct ata_device *dev, return 0; } -static inline int ata_acpi_cbl_80wire(struct ata_port *ap, - const struct ata_acpi_gtm *gtm) +static inline int ata_acpi_cbl_pata_type(struct ata_port *ap) { - return 0; + return ATA_CBL_PATA40; } #endif From d17e61ab63fb7747b340d6a66bf1408cd5c6562b Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Fri, 6 Jun 2025 22:37:29 +0200 Subject: [PATCH 115/497] drm/meson: fix debug log statement when setting the HDMI clocks The "phy" and "vclk" frequency labels were swapped, making it more difficult to debug driver errors. Swap the label order to make them match with the actual frequencies printed to correct this. Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250606203729.3311592-1-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_encoder_hdmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 47136bbbe8c6..ab08d690d882 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -109,7 +109,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi, venc_freq /= 2; dev_dbg(priv->dev, - "vclk:%lluHz phy=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", + "phy:%lluHz vclk=%lluHz venc=%lluHz hdmi=%lluHz enci=%d\n", phy_freq, vclk_freq, venc_freq, hdmi_freq, priv->venc.hdmi_use_enci); From faf2f8382088e8c74bd6eeb236c8c9190e61615e Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 7 Jun 2025 00:10:31 +0200 Subject: [PATCH 116/497] drm/meson: use vclk_freq instead of pixel_freq in debug print meson_vclk_vic_supported_freq() has a debug print which includes the pixel freq. However, within the whole function the pixel freq is irrelevant, other than checking the end of the params array. Switch to printing the vclk_freq which is being compared / matched against the inputs to the function to avoid confusion when analyzing error reports from users. Fixes: e5fab2ec9ca4 ("drm/meson: vclk: add support for YUV420 setup") Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250606221031.3419353-1-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_vclk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index 3325580d885d..c4123bb958e4 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -790,9 +790,9 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d pixel_freq = %lluHz alt = %lluHz\n", - i, params[i].pixel_freq, - PIXEL_FREQ_1000_1001(params[i].pixel_freq)); + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + i, params[i].vclk_freq, + PIXEL_FREQ_1000_1001(params[i].vclk_freq)); DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", i, params[i].phy_freq, PHY_FREQ_1000_1001(params[i].phy_freq)); From 0cee6c4d3518b2e757aedae78771f17149f57653 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Mon, 9 Jun 2025 22:27:51 +0200 Subject: [PATCH 117/497] drm/meson: fix more rounding issues with 59.94Hz modes Commit 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types") attempts to resolve video playback using 59.94Hz. using YUV420 by changing the clock calculation to use Hz instead of kHz (thus yielding more precision). The basic calculation itself is correct, however the comparisions in meson_vclk_vic_supported_freq() and meson_vclk_setup() don't work anymore for 59.94Hz modes (using the freq * 1000 / 1001 logic). For example, drm/edid specifies a 593407kHz clock for 3840x2160@59.94Hz. With the mentioend commit we convert this to Hz. Then meson_vclk tries to find a matchig "params" entry (as the clock setup code currently only supports specific frequencies) by taking the venc_freq from the params and calculating the "alt frequency" (used for the 59.94Hz modes) from it, which is: (594000000Hz * 1000) / 1001 = 593406593Hz Similar calculation is applied to the phy_freq (TMDS clock), which is 10 times the pixel clock. Implement a new meson_vclk_freqs_are_matching_param() function whose purpose is to compare if the requested and calculated frequencies. They may not match exactly (for the reasons mentioned above). Allow the clocks to deviate slightly to make the 59.94Hz modes again. Fixes: 1017560164b6 ("drm/meson: use unsigned long long / Hz for frequency types") Reported-by: Christian Hewitt Signed-off-by: Martin Blumenstingl Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20250609202751.962208-1-martin.blumenstingl@googlemail.com --- drivers/gpu/drm/meson/meson_vclk.c | 55 ++++++++++++++++++------------ 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c index c4123bb958e4..dfe0c28a0f05 100644 --- a/drivers/gpu/drm/meson/meson_vclk.c +++ b/drivers/gpu/drm/meson/meson_vclk.c @@ -110,10 +110,7 @@ #define HDMI_PLL_LOCK BIT(31) #define HDMI_PLL_LOCK_G12A (3 << 30) -#define PIXEL_FREQ_1000_1001(_freq) \ - DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) -#define PHY_FREQ_1000_1001(_freq) \ - (PIXEL_FREQ_1000_1001(DIV_ROUND_DOWN_ULL(_freq, 10ULL)) * 10) +#define FREQ_1000_1001(_freq) DIV_ROUND_CLOSEST_ULL((_freq) * 1000ULL, 1001ULL) /* VID PLL Dividers */ enum { @@ -772,6 +769,36 @@ static void meson_hdmi_pll_generic_set(struct meson_drm *priv, pll_freq); } +static bool meson_vclk_freqs_are_matching_param(unsigned int idx, + unsigned long long phy_freq, + unsigned long long vclk_freq) +{ + DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", + idx, params[idx].vclk_freq, + FREQ_1000_1001(params[idx].vclk_freq)); + DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", + idx, params[idx].phy_freq, + FREQ_1000_1001(params[idx].phy_freq)); + + /* Match strict frequency */ + if (phy_freq == params[idx].phy_freq && + vclk_freq == params[idx].vclk_freq) + return true; + + /* Match 1000/1001 variant: vclk deviation has to be less than 1kHz + * (drm EDID is defined in 1kHz steps, so everything smaller must be + * rounding error) and the PHY freq deviation has to be less than + * 10kHz (as the TMDS clock is 10 times the pixel clock, so anything + * smaller must be rounding error as well). + */ + if (abs(vclk_freq - FREQ_1000_1001(params[idx].vclk_freq)) < 1000 && + abs(phy_freq - FREQ_1000_1001(params[idx].phy_freq)) < 10000) + return true; + + /* no match */ + return false; +} + enum drm_mode_status meson_vclk_vic_supported_freq(struct meson_drm *priv, unsigned long long phy_freq, @@ -790,19 +817,7 @@ meson_vclk_vic_supported_freq(struct meson_drm *priv, } for (i = 0 ; params[i].pixel_freq ; ++i) { - DRM_DEBUG_DRIVER("i = %d vclk_freq = %lluHz alt = %lluHz\n", - i, params[i].vclk_freq, - PIXEL_FREQ_1000_1001(params[i].vclk_freq)); - DRM_DEBUG_DRIVER("i = %d phy_freq = %lluHz alt = %lluHz\n", - i, params[i].phy_freq, - PHY_FREQ_1000_1001(params[i].phy_freq)); - /* Match strict frequency */ - if (phy_freq == params[i].phy_freq && - vclk_freq == params[i].vclk_freq) - return MODE_OK; - /* Match 1000/1001 variant */ - if (phy_freq == PHY_FREQ_1000_1001(params[i].phy_freq) && - vclk_freq == PIXEL_FREQ_1000_1001(params[i].vclk_freq)) + if (meson_vclk_freqs_are_matching_param(i, phy_freq, vclk_freq)) return MODE_OK; } @@ -1075,10 +1090,8 @@ void meson_vclk_setup(struct meson_drm *priv, unsigned int target, } for (freq = 0 ; params[freq].pixel_freq ; ++freq) { - if ((phy_freq == params[freq].phy_freq || - phy_freq == PHY_FREQ_1000_1001(params[freq].phy_freq)) && - (vclk_freq == params[freq].vclk_freq || - vclk_freq == PIXEL_FREQ_1000_1001(params[freq].vclk_freq))) { + if (meson_vclk_freqs_are_matching_param(freq, phy_freq, + vclk_freq)) { if (vclk_freq != params[freq].vclk_freq) vic_alternate_clock = true; else From fe5b391fc56f77cf3c22a9dd4f0ce20db0e3533f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Jun 2025 11:01:11 +0200 Subject: [PATCH 118/497] ata: pata_cs5536: fix build on 32-bit UML On 32-bit ARCH=um, CONFIG_X86_32 is still defined, so it doesn't indicate building on real X86 machines. There's no MSR on UML though, so add a check for CONFIG_X86. Reported-by: Arnd Bergmann Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20250606090110.15784-2-johannes@sipsolutions.net Signed-off-by: Niklas Cassel --- drivers/ata/pata_cs5536.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c index b811efd2cc34..73e81e160c91 100644 --- a/drivers/ata/pata_cs5536.c +++ b/drivers/ata/pata_cs5536.c @@ -27,7 +27,7 @@ #include #include -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86) && defined(CONFIG_X86_32) #include static int use_msr; module_param_named(msr, use_msr, int, 0644); From 5558f27a58459a4038ebb23bcb5bd40c1e345c57 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sat, 7 Jun 2025 21:52:03 +0800 Subject: [PATCH 119/497] pinctrl: sunxi: dt: Consider pin base when calculating bank number from pin In prepare_function_table() when the pinctrl function table IRQ entries are generated, the pin bank is calculated from the absolute pin number; however the IRQ bank mux array is indexed from the first pin bank of the controller. For R_PIO controllers, this means the absolute pin bank is way off from the relative pin bank used for array indexing. Correct this by taking into account the pin base of the controller. Fixes: f5e2cd34b12f ("pinctrl: sunxi: allow reading mux values from DT") Signed-off-by: Chen-Yu Tsai Link: https://lore.kernel.org/20250607135203.2085226-1-wens@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c index 1833078f6877..4e34b0cd3b73 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi-dt.c @@ -143,7 +143,7 @@ static struct sunxi_desc_pin *init_pins_table(struct device *dev, */ static int prepare_function_table(struct device *dev, struct device_node *pnode, struct sunxi_desc_pin *pins, int npins, - const u8 *irq_bank_muxes) + unsigned pin_base, const u8 *irq_bank_muxes) { struct device_node *node; struct property *prop; @@ -166,7 +166,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, */ for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; if (irq_bank_muxes[bank]) { pin->variant++; @@ -211,7 +211,7 @@ static int prepare_function_table(struct device *dev, struct device_node *pnode, last_bank = 0; for (i = 0; i < npins; i++) { struct sunxi_desc_pin *pin = &pins[i]; - int bank = pin->pin.number / PINS_PER_BANK; + int bank = (pin->pin.number - pin_base) / PINS_PER_BANK; int lastfunc = pin->variant + 1; int irq_mux = irq_bank_muxes[bank]; @@ -353,7 +353,7 @@ int sunxi_pinctrl_dt_table_init(struct platform_device *pdev, return PTR_ERR(pins); ret = prepare_function_table(&pdev->dev, pnode, pins, desc->npins, - irq_bank_muxes); + desc->pin_base, irq_bank_muxes); if (ret) return ret; From 8a157d8a00e815cab4432653cb50c9cedbbb4931 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Jun 2025 09:33:48 -0400 Subject: [PATCH 120/497] tracing: Do not free "head" on error path of filter_free_subsystem_filters() The variable "head" is allocated and initialized as a list before allocating the first "item" for the list. If the allocation of "item" fails, it frees "head" and then jumps to the label "free_now" which will process head and free it. This will cause a UAF of "head", and it doesn't need to free it before jumping to the "free_now" label as that code will free it. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250610093348.33c5643a@gandalf.local.home Fixes: a9d0aab5eb33 ("tracing: Fix regression of filter waiting a long time on RCU synchronization") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202506070424.lCiNreTI-lkp@intel.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_filter.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index ea8b364b6818..08141f105c95 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1437,10 +1437,8 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, INIT_LIST_HEAD(&head->list); item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) { - kfree(head); + if (!item) goto free_now; - } item->filter = filter; list_add_tail(&item->list, &head->list); From a298bbab903e3fb4cbe16d36d6195e68fad1b776 Mon Sep 17 00:00:00 2001 From: Suleiman Souhlal Date: Fri, 6 Jun 2025 16:45:38 +0900 Subject: [PATCH 121/497] tools/resolve_btfids: Fix build when cross compiling kernel with clang. When cross compiling the kernel with clang, we need to override CLANG_CROSS_FLAGS when preparing the step libraries. Prior to commit d1d096312176 ("tools: fix annoying "mkdir -p ..." logs when building tools in parallel"), MAKEFLAGS would have been set to a value that wouldn't set a value for CLANG_CROSS_FLAGS, hiding the fact that we weren't properly overriding it. Fixes: 56a2df7615fa ("tools/resolve_btfids: Compile resolve_btfids as host program") Signed-off-by: Suleiman Souhlal Signed-off-by: Andrii Nakryiko Acked-by: Jiri Olsa Cc: stable@vger.kernel.org Link: https://lore.kernel.org/bpf/20250606074538.1608546-1-suleiman@google.com --- tools/bpf/resolve_btfids/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index afbddea3a39c..ce1b556dfa90 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -17,7 +17,7 @@ endif # Overrides for the prepare step libraries. HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \ - CROSS_COMPILE="" EXTRA_CFLAGS="$(HOSTCFLAGS)" + CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)" RM ?= rm HOSTCC ?= gcc From a2c90d63b71223d69a813333c1abf4fdacddbbe5 Mon Sep 17 00:00:00 2001 From: Robert Malz Date: Tue, 20 May 2025 10:31:51 +0200 Subject: [PATCH 122/497] i40e: return false from i40e_reset_vf if reset is in progress The function i40e_vc_reset_vf attempts, up to 20 times, to handle a VF reset request, using the return value of i40e_reset_vf as an indicator of whether the reset was successfully triggered. Currently, i40e_reset_vf always returns true, which causes new reset requests to be ignored if a different VF reset is already in progress. This patch updates the return value of i40e_reset_vf to reflect when another VF reset is in progress, allowing the caller to properly use the retry mechanism. Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF") Signed-off-by: Robert Malz Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 1120f8e4bb67..22d5b1ec2289 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1546,8 +1546,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) * @vf: pointer to the VF structure * @flr: VFLR was issued or not * - * Returns true if the VF is in reset, resets successfully, or resets - * are disabled and false otherwise. + * Return: True if reset was performed successfully or if resets are disabled. + * False if reset is already in progress. **/ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) { @@ -1566,7 +1566,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) /* If VF is being reset already we don't need to continue. */ if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) - return true; + return false; i40e_trigger_vf_reset(vf, flr); From fb4e9239e029954a37a00818b21e837cebf2aa10 Mon Sep 17 00:00:00 2001 From: Robert Malz Date: Tue, 20 May 2025 10:31:52 +0200 Subject: [PATCH 123/497] i40e: retry VFLR handling if there is ongoing VF reset When a VFLR interrupt is received during a VF reset initiated from a different source, the VFLR may be not fully handled. This can leave the VF in an undefined state. To address this, set the I40E_VFLR_EVENT_PENDING bit again during VFLR handling if the reset is not yet complete. This ensures the driver will properly complete the VF reset in such scenarios. Fixes: 52424f974bc5 ("i40e: Fix VF hang when reset is triggered on another VF") Signed-off-by: Robert Malz Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 22d5b1ec2289..88e6bef69342 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4328,7 +4328,10 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx)); if (reg & BIT(bit_idx)) /* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */ - i40e_reset_vf(vf, true); + if (!i40e_reset_vf(vf, true)) { + /* At least one VF did not finish resetting, retry next time */ + set_bit(__I40E_VFLR_EVENT_PENDING, pf->state); + } } return 0; From 0c6f4631436ecea841f1583b98255aedd7495b18 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 24 Apr 2025 15:50:13 +0200 Subject: [PATCH 124/497] iavf: fix reset_task for early reset event If a reset event is received from the PF early in the init cycle, the state machine hangs for about 25 seconds. Reproducer: echo 1 > /sys/class/net/$PF0/device/sriov_numvfs ip link set dev $PF0 vf 0 mac $NEW_MAC The log shows: [792.620416] ice 0000:5e:00.0: Enabling 1 VFs [792.738812] iavf 0000:5e:01.0: enabling device (0000 -> 0002) [792.744182] ice 0000:5e:00.0: Enabling 1 VFs with 17 vectors and 16 queues per VF [792.839964] ice 0000:5e:00.0: Setting MAC 52:54:00:00:00:11 on VF 0. VF driver will be reinitialized [813.389684] iavf 0000:5e:01.0: Failed to communicate with PF; waiting before retry [818.635918] iavf 0000:5e:01.0: Hardware came out of reset. Attempting reinit. [818.766273] iavf 0000:5e:01.0: Multiqueue Enabled: Queue pair count = 16 Fix it by scheduling the reset task and making the reset task capable of resetting early in the init cycle. Fixes: ef8693eb90ae3 ("i40evf: refactor reset handling") Signed-off-by: Ahmed Zaki Tested-by: Przemek Kitszel Reviewed-by: Przemek Kitszel Signed-off-by: Marcin Szycik Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 11 +++++++++++ drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 17 +++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2c0bb41809a4..81d7249d1149 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3209,6 +3209,17 @@ static void iavf_reset_task(struct work_struct *work) } continue_reset: + /* If we are still early in the state machine, just restart. */ + if (adapter->state <= __IAVF_INIT_FAILED) { + iavf_shutdown_adminq(hw); + iavf_change_state(adapter, __IAVF_STARTUP); + iavf_startup(adapter); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + netdev_unlock(netdev); + return; + } + /* We don't use netif_running() because it may be true prior to * ndo_open() returning, so we can't assume it means all our open * tasks have finished, since we're not holding the rtnl_lock here. diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index a6f0e5990be2..07f0d0a0f1e2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -79,6 +79,23 @@ iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event, return iavf_status_to_errno(status); received_op = (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high); + + if (received_op == VIRTCHNL_OP_EVENT) { + struct iavf_adapter *adapter = hw->back; + struct virtchnl_pf_event *vpe = + (struct virtchnl_pf_event *)event->msg_buf; + + if (vpe->event != VIRTCHNL_EVENT_RESET_IMPENDING) + continue; + + dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n"); + if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) + iavf_schedule_reset(adapter, + IAVF_FLAG_RESET_PENDING); + + return -EIO; + } + if (op_to_poll == received_op) break; } From a5a441ae283d54ec329aadc7426991dc32786d52 Mon Sep 17 00:00:00 2001 From: Anton Nadezhdin Date: Tue, 20 May 2025 10:42:16 +0200 Subject: [PATCH 125/497] ice/ptp: fix crosstimestamp reporting Set use_nsecs=true as timestamp is reported in ns. Lack of this result in smaller timestamp error window which cause error during phc2sys execution on E825 NICs: phc2sys[1768.256]: ioctl PTP_SYS_OFFSET_PRECISE: Invalid argument This problem was introduced in the cited commit which omitted setting use_nsecs to true when converting the ice driver to use convert_base_to_cs(). Testing hints (ethX is PF netdev): phc2sys -s ethX -c CLOCK_REALTIME -O 37 -m phc2sys[1769.256]: CLOCK_REALTIME phc offset -5 s0 freq -0 delay 0 Fixes: d4bea547ebb57 ("ice/ptp: Remove convert_art_to_tsc()") Signed-off-by: Anton Nadezhdin Reviewed-by: Aleksandr Loktionov Reviewed-by: Arkadiusz Kubalewski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index b79a148ed0f2..55cad824c5b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2299,6 +2299,7 @@ static int ice_capture_crosststamp(ktime_t *device, ts = ((u64)ts_hi << 32) | ts_lo; system->cycles = ts; system->cs_id = CSID_X86_ART; + system->use_nsecs = true; /* Read Device source clock time */ ts_lo = rd32(hw, cfg->dev_time_l[tmr_idx]); From b4a8085ceefb7bbb12c2b71c55e71fc946c6929f Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Tue, 3 Jun 2025 16:34:01 +0000 Subject: [PATCH 126/497] e1000: Move cancel_work_sync to avoid deadlock Previously, e1000_down called cancel_work_sync for the e1000 reset task (via e1000_down_and_stop), which takes RTNL. As reported by users and syzbot, a deadlock is possible in the following scenario: CPU 0: - RTNL is held - e1000_close - e1000_down - cancel_work_sync (cancel / wait for e1000_reset_task()) CPU 1: - process_one_work - e1000_reset_task - take RTNL To remedy this, avoid calling cancel_work_sync from e1000_down (e1000_reset_task does nothing if the device is down anyway). Instead, call cancel_work_sync for e1000_reset_task when the device is being removed. Fixes: e400c7444d84 ("e1000: Hold RTNL when e1000_down can be called") Reported-by: syzbot+846bb38dc67fe62cc733@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/683837bf.a00a0220.52848.0003.GAE@google.com/ Reported-by: John Closes: https://lore.kernel.org/netdev/CAP=Rh=OEsn4y_2LvkO3UtDWurKcGPnZ_NPSXK=FbgygNXL37Sw@mail.gmail.com/ Signed-off-by: Joe Damato Acked-by: Stanislav Fomichev Acked-by: Jacob Keller Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000/e1000_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3f089c3d47b2..d8595e84326d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -477,10 +477,6 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter) cancel_delayed_work_sync(&adapter->phy_info_task); cancel_delayed_work_sync(&adapter->fifo_stall_task); - - /* Only kill reset task if adapter is not resetting */ - if (!test_bit(__E1000_RESETTING, &adapter->flags)) - cancel_work_sync(&adapter->reset_task); } void e1000_down(struct e1000_adapter *adapter) @@ -1266,6 +1262,10 @@ static void e1000_remove(struct pci_dev *pdev) unregister_netdev(netdev); + /* Only kill reset task if adapter is not resetting */ + if (!test_bit(__E1000_RESETTING, &adapter->flags)) + cancel_work_sync(&adapter->reset_task); + e1000_phy_hw_reset(hw); kfree(adapter->tx_ring); From ac0b8b327a5677dc6fecdf353d808161525b1ff0 Mon Sep 17 00:00:00 2001 From: Penglei Jiang Date: Tue, 10 Jun 2025 10:18:01 -0700 Subject: [PATCH 127/497] io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo() syzbot reports: BUG: KASAN: slab-use-after-free in getrusage+0x1109/0x1a60 Read of size 8 at addr ffff88810de2d2c8 by task a.out/304 CPU: 0 UID: 0 PID: 304 Comm: a.out Not tainted 6.16.0-rc1 #1 PREEMPT(voluntary) Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Call Trace: dump_stack_lvl+0x53/0x70 print_report+0xd0/0x670 ? __pfx__raw_spin_lock_irqsave+0x10/0x10 ? getrusage+0x1109/0x1a60 kasan_report+0xce/0x100 ? getrusage+0x1109/0x1a60 getrusage+0x1109/0x1a60 ? __pfx_getrusage+0x10/0x10 __io_uring_show_fdinfo+0x9fe/0x1790 ? ksys_read+0xf7/0x1c0 ? do_syscall_64+0xa4/0x260 ? vsnprintf+0x591/0x1100 ? __pfx___io_uring_show_fdinfo+0x10/0x10 ? __pfx_vsnprintf+0x10/0x10 ? mutex_trylock+0xcf/0x130 ? __pfx_mutex_trylock+0x10/0x10 ? __pfx_show_fd_locks+0x10/0x10 ? io_uring_show_fdinfo+0x57/0x80 io_uring_show_fdinfo+0x57/0x80 seq_show+0x38c/0x690 seq_read_iter+0x3f7/0x1180 ? inode_set_ctime_current+0x160/0x4b0 seq_read+0x271/0x3e0 ? __pfx_seq_read+0x10/0x10 ? __pfx__raw_spin_lock+0x10/0x10 ? __mark_inode_dirty+0x402/0x810 ? selinux_file_permission+0x368/0x500 ? file_update_time+0x10f/0x160 vfs_read+0x177/0xa40 ? __pfx___handle_mm_fault+0x10/0x10 ? __pfx_vfs_read+0x10/0x10 ? mutex_lock+0x81/0xe0 ? __pfx_mutex_lock+0x10/0x10 ? fdget_pos+0x24d/0x4b0 ksys_read+0xf7/0x1c0 ? __pfx_ksys_read+0x10/0x10 ? do_user_addr_fault+0x43b/0x9c0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f0f74170fc9 Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 8 RSP: 002b:00007fffece049e8 EFLAGS: 00000206 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f0f74170fc9 RDX: 0000000000001000 RSI: 00007fffece049f0 RDI: 0000000000000004 RBP: 00007fffece05ad0 R08: 0000000000000000 R09: 00007fffece04d90 R10: 0000000000000000 R11: 0000000000000206 R12: 00005651720a1100 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 298: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_slab_alloc+0x6e/0x70 kmem_cache_alloc_node_noprof+0xe8/0x330 copy_process+0x376/0x5e00 create_io_thread+0xab/0xf0 io_sq_offload_create+0x9ed/0xf20 io_uring_setup+0x12b0/0x1cc0 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 22: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0x37/0x50 kmem_cache_free+0xc4/0x360 rcu_core+0x5ff/0x19f0 handle_softirqs+0x18c/0x530 run_ksoftirqd+0x20/0x30 smpboot_thread_fn+0x287/0x6c0 kthread+0x30d/0x630 ret_from_fork+0xef/0x1a0 ret_from_fork_asm+0x1a/0x30 Last potentially related work creation: kasan_save_stack+0x33/0x60 kasan_record_aux_stack+0x8c/0xa0 __call_rcu_common.constprop.0+0x68/0x940 __schedule+0xff2/0x2930 __cond_resched+0x4c/0x80 mutex_lock+0x5c/0xe0 io_uring_del_tctx_node+0xe1/0x2b0 io_uring_clean_tctx+0xb7/0x160 io_uring_cancel_generic+0x34e/0x760 do_exit+0x240/0x2350 do_group_exit+0xab/0x220 __x64_sys_exit_group+0x39/0x40 x64_sys_call+0x1243/0x1840 do_syscall_64+0xa4/0x260 entry_SYSCALL_64_after_hwframe+0x77/0x7f The buggy address belongs to the object at ffff88810de2cb00 which belongs to the cache task_struct of size 3712 The buggy address is located 1992 bytes inside of freed 3712-byte region [ffff88810de2cb00, ffff88810de2d980) which is caused by the task_struct pointed to by sq->thread being released while it is being used in the function __io_uring_show_fdinfo(). Holding ctx->uring_lock does not prevent ehre relase or exit of sq->thread. Fix this by assigning and looking up ->thread under RCU, and grabbing a reference to the task_struct. This ensures that it cannot get released while fdinfo is using it. Reported-by: syzbot+531502bbbe51d2f769f4@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/682b06a5.a70a0220.3849cf.00b3.GAE@google.com Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Signed-off-by: Penglei Jiang Link: https://lore.kernel.org/r/20250610171801.70960-1-superman.xpt@gmail.com [axboe: massage commit message] Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 12 ++++++++++-- io_uring/sqpoll.c | 9 ++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index e9355276ab5d..9798d6fb4ec7 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -141,18 +141,26 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sq = ctx->sq_data; + struct task_struct *tsk; + rcu_read_lock(); + tsk = rcu_dereference(sq->thread); /* * sq->thread might be NULL if we raced with the sqpoll * thread termination. */ - if (sq->thread) { + if (tsk) { + get_task_struct(tsk); + rcu_read_unlock(); + getrusage(tsk, RUSAGE_SELF, &sq_usage); + put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - getrusage(sq->thread, RUSAGE_SELF, &sq_usage); sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 + sq_usage.ru_stime.tv_usec); sq_work_time = sq->work_time; + } else { + rcu_read_unlock(); } } diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 03c699493b5a..0625a421626f 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -270,7 +270,8 @@ static int io_sq_thread(void *data) /* offload context creation failed, just exit */ if (!current->io_uring) { mutex_lock(&sqd->lock); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); mutex_unlock(&sqd->lock); goto err_out; } @@ -379,7 +380,8 @@ static int io_sq_thread(void *data) io_sq_tw(&retry_list, UINT_MAX); io_uring_cancel_generic(true, sqd); - sqd->thread = NULL; + rcu_assign_pointer(sqd->thread, NULL); + put_task_struct(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); io_run_task_work(); @@ -495,9 +497,6 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, ret = -EINVAL; goto err; } - - if (task_to_put) - put_task_struct(task_to_put); return 0; err_sqpoll: complete(&ctx->sq_data->exited); From aef17cb3d3c43854002956f24c24ec8e1a0e3546 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 10 Jun 2025 10:22:15 -0700 Subject: [PATCH 128/497] Revert "mm/damon/Kconfig: enable CONFIG_DAMON by default" This reverts commit 28615e6eed152f2fda5486680090b74aeed7b554. No, we don't make random features default to being on. Reported-by: Geert Uytterhoeven Cc: Andrew Morton Cc: SeongJae Park Signed-off-by: Linus Torvalds --- mm/damon/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig index 551745df011b..c93d0c56b963 100644 --- a/mm/damon/Kconfig +++ b/mm/damon/Kconfig @@ -4,7 +4,6 @@ menu "Data Access Monitoring" config DAMON bool "DAMON: Data Access Monitoring Framework" - default y help This builds a framework that allows kernel subsystems to monitor access frequency of each memory region. The information can be useful From c393befa14ab26596fb86d702566d648832dae06 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 6 Jun 2025 20:32:26 -0700 Subject: [PATCH 129/497] driver core: faux: Suppress bind attributes faux_device_create() is almost a suitable candidate to replace platform_driver_probe() if not for the fact that faux_device_create() supports dynamic attach/detach of the driver. Drop the bind attributes with the expectation that simple faux devices can always assume that the device is permanently bound at create, and only unbound at 'destroy'. The acpi-einj driver depends on static bind. Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250607033228.1475625-2-dan.j.williams@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/base/faux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/faux.c b/drivers/base/faux.c index 9054d346bd7f..934da77ca48b 100644 --- a/drivers/base/faux.c +++ b/drivers/base/faux.c @@ -86,6 +86,7 @@ static struct device_driver faux_driver = { .name = "faux_driver", .bus = &faux_bus_type, .probe_type = PROBE_FORCE_SYNCHRONOUS, + .suppress_bind_attrs = true, }; static void faux_device_release(struct device *dev) From ff53a6e247285687df1a71d8ee5c457939792c13 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 6 Jun 2025 20:32:27 -0700 Subject: [PATCH 130/497] driver core: faux: Quiet probe failures The acpi-einj conversion to faux_device_create() leads to a noisy error message when the error injection facility is disabled. Quiet the error as CXL error injection via ACPI expects the module to stay loaded even if the error injection facility is disabled. This situation arose because CXL knows proper kernel named objects to trigger errors against, but acpi-einj knows how to perform the error injection. The injection mechanism is shared with non-CXL use cases. The result is CXL now has a module dependency on einj-core.ko, and init/probe failures are handled at runtime. Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250607033228.1475625-3-dan.j.williams@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/base/faux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/faux.c b/drivers/base/faux.c index 934da77ca48b..f5fbda0a9a44 100644 --- a/drivers/base/faux.c +++ b/drivers/base/faux.c @@ -170,7 +170,7 @@ struct faux_device *faux_device_create_with_groups(const char *name, * successful is almost impossible to determine by the caller. */ if (!dev->driver) { - dev_err(dev, "probe did not succeed, tearing down the device\n"); + dev_dbg(dev, "probe did not succeed, tearing down the device\n"); faux_device_destroy(faux_dev); faux_dev = NULL; } From 162457f5853ce3348e7956666916f5e5e31be51f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 6 Jun 2025 20:32:28 -0700 Subject: [PATCH 131/497] ACPI: APEI: EINJ: Do not fail einj_init() on faux_device_create() failure CXL has a symbol dependency on einj_core.ko, so if einj_init() fails then cxl_core.ko fails to load. Prior to the faux_device_create() conversion, einj_probe() failures were tracked by the einj_initialized flag without failing einj_init(). Revert to that behavior and always succeed einj_init() given there is no way, and no pressing need, to discern faux device-create vs device-probe failures. This situation arose because CXL knows proper kernel named objects to trigger errors against, but acpi-einj knows how to perform the error injection. The injection mechanism is shared with non-CXL use cases. The result is CXL now has a module dependency on einj-core.ko, and init/probe failures are handled at runtime. Fixes: 6cb9441bfe8d ("ACPI: APEI: EINJ: Transition to the faux device interface") Signed-off-by: Dan Williams Reviewed-by: Ben Cheatham Acked-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250607033228.1475625-4-dan.j.williams@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/einj-core.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c index fea11a35eea3..9b041415a9d0 100644 --- a/drivers/acpi/apei/einj-core.c +++ b/drivers/acpi/apei/einj-core.c @@ -883,19 +883,16 @@ static int __init einj_init(void) } einj_dev = faux_device_create("acpi-einj", NULL, &einj_device_ops); - if (!einj_dev) - return -ENODEV; - einj_initialized = true; + if (einj_dev) + einj_initialized = true; return 0; } static void __exit einj_exit(void) { - if (einj_initialized) - faux_device_destroy(einj_dev); - + faux_device_destroy(einj_dev); } module_init(einj_init); From 5b2d595efbfc9c46823bdb9ef11e1f9fa46adf9d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 6 Jun 2025 11:05:05 +0900 Subject: [PATCH 132/497] rust: time: Fix compile error in impl_has_hr_timer macro Fix a compile error in the `impl_has_hr_timer!` macro as follows: error[E0599]: no method named cast_mut found for raw pointer *mut Foo in the current scope The `container_of!` macro already returns a mutable pointer when used in a `*mut T` context so the `.cast_mut()` method is not available. [ We missed this one because there is no caller yet and it is a macro. - Miguel ] Fixes: 74d6a606c2b3 ("rust: retain pointer mut-ness in `container_of!`") Signed-off-by: FUJITA Tomonori Reviewed-by: Benno Lossin Acked-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250606020505.3186533-1-fujita.tomonori@gmail.com Signed-off-by: Miguel Ojeda --- rust/kernel/time/hrtimer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/time/hrtimer.rs b/rust/kernel/time/hrtimer.rs index 9df3dcd2fa39..36e1290cd079 100644 --- a/rust/kernel/time/hrtimer.rs +++ b/rust/kernel/time/hrtimer.rs @@ -517,7 +517,7 @@ unsafe fn timer_container_of( ) -> *mut Self { // SAFETY: As per the safety requirement of this function, `ptr` // is pointing inside a `$timer_type`. - unsafe { ::kernel::container_of!(ptr, $timer_type, $field).cast_mut() } + unsafe { ::kernel::container_of!(ptr, $timer_type, $field) } } } } From 2f76d269073bdb2971b253ef87d1f96f1a94c50e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 9 Jun 2025 08:42:06 +0200 Subject: [PATCH 133/497] ACPI: PAD: Update arguments of mwait_idle_with_hints() Commit a17b37a3f416 ("x86/idle: Change arguments of mwait_idle_with_hints() to u32") changed the type of arguments of mwait_idle_with_hints() from unsigned long to u32. Change the type of variables in the call to mwait_idle_with_hints() to unsigned int to follow the change. Signed-off-by: Uros Bizjak Link: https://patch.msgid.link/20250609064235.49146-1-ubizjak@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_pad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 6f8bbe1247a5..c9a0bcaba2e4 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -33,7 +33,7 @@ static DEFINE_MUTEX(isolated_cpus_lock); static DEFINE_MUTEX(round_robin_lock); -static unsigned long power_saving_mwait_eax; +static unsigned int power_saving_mwait_eax; static unsigned char tsc_detected_unstable; static unsigned char tsc_marked_unstable; From 15eece6c5b05e5f9db0711978c3e3b7f1a2cfe12 Mon Sep 17 00:00:00 2001 From: Yunhui Cui Date: Wed, 4 Jun 2025 10:30:36 +0800 Subject: [PATCH 134/497] ACPI: CPPC: Fix NULL pointer dereference when nosmp is used With nosmp in cmdline, other CPUs are not brought up, leaving their cpc_desc_ptr NULL. CPU0's iteration via for_each_possible_cpu() dereferences these NULL pointers, causing panic. Panic backtrace: [ 0.401123] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000b8 ... [ 0.403255] [] cppc_allow_fast_switch+0x6a/0xd4 ... Kernel panic - not syncing: Attempted to kill init! Fixes: 3cc30dd00a58 ("cpufreq: CPPC: Enable fast_switch") Reported-by: Xu Lu Signed-off-by: Yunhui Cui Link: https://patch.msgid.link/20250604023036.99553-1-cuiyunhui@bytedance.com [ rjw: New subject ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a9ae2fd62863..6b649031808f 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -476,7 +476,7 @@ bool cppc_allow_fast_switch(void) struct cpc_desc *cpc_ptr; int cpu; - for_each_possible_cpu(cpu) { + for_each_present_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF]; if (!CPC_IN_SYSTEM_MEMORY(desired_reg) && From 7a0d59f6a913a2bc7680c663b8cf1e45d1bdbf26 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Fri, 30 May 2025 01:53:10 +0200 Subject: [PATCH 135/497] ACPI: EC: Ignore ECDT tables with an invalid ID string On the MSI Modern 14 C5M the ECDT table contains invalid data: UID : 00000000 GPE Number : 00 /* Invalid, 03 would be correct */ Namepath : "" /* Invalid, "\_SB.PCI0.SBRG.EC" would * be correct */ This slows down the EC access as the wrong GPE event is used for communication. Additionally the ID string is invalid. Ignore such faulty ECDT tables by verifying that the ID string has a valid format. Tested-by: glpnk@proton.me Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20250529235310.540530-1-W_Armin@gmx.de Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 6f4203716b53..75c7db8b156a 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -23,8 +23,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -2031,6 +2033,21 @@ void __init acpi_ec_ecdt_probe(void) goto out; } + if (!strstarts(ecdt_ptr->id, "\\")) { + /* + * The ECDT table on some MSI notebooks contains invalid data, together + * with an empty ID string (""). + * + * Section 5.2.15 of the ACPI specification requires the ID string to be + * a "fully qualified reference to the (...) embedded controller device", + * so this string always has to start with a backslash. + * + * By verifying this we can avoid such faulty ECDT tables in a safe way. + */ + pr_err(FW_BUG "Ignoring ECDT due to invalid ID string \"%s\"\n", ecdt_ptr->id); + goto out; + } + ec = acpi_ec_alloc(); if (!ec) goto out; From c99ad987d3e9b550e9839d5df22de97d90462e5f Mon Sep 17 00:00:00 2001 From: Wentao Guan Date: Tue, 3 Jun 2025 20:20:59 +0800 Subject: [PATCH 136/497] ACPI: resource: Use IRQ override on MACHENIKE 16P Use ACPI IRQ override on MACHENIKE laptop to make the internal keyboard work. Add a new entry to the irq1_edge_low_force_override structure, similar to the existing ones. Link: https://bbs.deepin.org.cn/zh/post/287628 Signed-off-by: Wentao Guan Link: https://patch.msgid.link/20250603122059.1072790-1-guanwentao@uniontech.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/resource.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 7d59c6c9185f..b1ab192d7a08 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -666,6 +666,13 @@ static const struct dmi_system_id irq1_edge_low_force_override[] = { DMI_MATCH(DMI_BOARD_NAME, "GMxHGxx"), }, }, + { + /* MACHENIKE L16P/L16P */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MACHENIKE"), + DMI_MATCH(DMI_BOARD_NAME, "L16P"), + }, + }, { /* * TongFang GM5HG0A in case of the SKIKK Vanaheim relabel the From 72840238e2bcb8fb24cb35d8d1d5a822c04e62a4 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 9 Jun 2025 08:35:01 +0200 Subject: [PATCH 137/497] intel_idle: Update arguments of mwait_idle_with_hints() Commit a17b37a3f416 ("x86/idle: Change arguments of mwait_idle_with_hints() to u32") changed the type of arguments of mwait_idle_with_hints() from unsigned long to u32. Change the type of variables in the call to mwait_idle_with_hints() to unsigned int to follow the change. Signed-off-by: Uros Bizjak Reviewed-by: Artem Bityutskiy Link: https://patch.msgid.link/20250609063528.48715-1-ubizjak@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 63565814c7e5..73747d20df85 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -152,8 +152,8 @@ static __always_inline int __intel_idle(struct cpuidle_device *dev, int index, bool irqoff) { struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); - unsigned long ecx = 1*irqoff; /* break on interrupt flag */ + unsigned int eax = flg2MWAIT(state->flags); + unsigned int ecx = 1*irqoff; /* break on interrupt flag */ mwait_idle_with_hints(eax, ecx); @@ -226,9 +226,9 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - unsigned long ecx = 1; /* break on interrupt flag */ struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); + unsigned int eax = flg2MWAIT(state->flags); + unsigned int ecx = 1; /* break on interrupt flag */ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) fpu_idle_fpregs(); From 0b3bc018e86afdc0cbfef61328c63d5c08f8b370 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Sat, 7 Jun 2025 01:07:37 +1200 Subject: [PATCH 138/497] x86/virt/tdx: Avoid indirect calls to TDX assembly functions Two 'static inline' TDX helper functions (sc_retry() and sc_retry_prerr()) take function pointer arguments which refer to assembly functions. Normally, the compiler inlines the TDX helper, realizes that the function pointer targets are completely static -- thus can be resolved at compile time -- and generates direct call instructions. But, other times (like when CONFIG_CC_OPTIMIZE_FOR_SIZE=y), the compiler declines to inline the helpers and will instead generate indirect call instructions. Indirect calls to assembly functions require special annotation (for various Control Flow Integrity mechanisms). But TDX assembly functions lack the special annotations and can only be called directly. Annotate both the helpers as '__always_inline' to prod the compiler into maintaining the direct calls. There is no guarantee here, but Peter has volunteered to report the compiler bug if this assumption ever breaks[1]. Fixes: 1e66a7e27539 ("x86/virt/tdx: Handle SEAMCALL no entropy error in common code") Fixes: df01f5ae07dd ("x86/virt/tdx: Add SEAMCALL error printing for module initialization") Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/20250605145914.GW39944@noisy.programming.kicks-ass.net/ [1] Link: https://lore.kernel.org/all/20250606130737.30713-1-kai.huang%40intel.com --- arch/x86/include/asm/tdx.h | 2 +- arch/x86/virt/vmx/tdx/tdx.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 8b19294600c4..7ddef3a69866 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -106,7 +106,7 @@ void tdx_init(void); typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); -static inline u64 sc_retry(sc_func_t func, u64 fn, +static __always_inline u64 sc_retry(sc_func_t func, u64 fn, struct tdx_module_args *args) { int retry = RDRAND_RETRY_LOOPS; diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c index 2457d13c3f9e..c7a9a087ccaf 100644 --- a/arch/x86/virt/vmx/tdx/tdx.c +++ b/arch/x86/virt/vmx/tdx/tdx.c @@ -75,8 +75,9 @@ static inline void seamcall_err_ret(u64 fn, u64 err, args->r9, args->r10, args->r11); } -static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, - u64 fn, struct tdx_module_args *args) +static __always_inline int sc_retry_prerr(sc_func_t func, + sc_err_func_t err_func, + u64 fn, struct tdx_module_args *args) { u64 sret = sc_retry(func, fn, args); From 9cf1e25053c269d64b9e9fa25e8697d6d58028d4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 10 Jun 2025 10:54:37 -0700 Subject: [PATCH 139/497] MAINTAINERS: Add myself as bpf networking reviewer I've been focusing on networking BPF bits lately, add myself as a reviewer. Signed-off-by: Stanislav Fomichev Acked-by: KP Singh Acked-by: Daniel Borkmann Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20250610175442.2138504-1-stfomichev@gmail.com Signed-off-by: Alexei Starovoitov --- .mailmap | 1 + MAINTAINERS | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.mailmap b/.mailmap index 77bb62564ef7..7641338b8d3f 100644 --- a/.mailmap +++ b/.mailmap @@ -712,6 +712,7 @@ Srinivas Ramana Sriram R Sriram Yagnaraman Stanislav Fomichev +Stanislav Fomichev Stefan Wahren Stéphane Witzmann Stephen Hemminger diff --git a/MAINTAINERS b/MAINTAINERS index 8d314e169486..8e109f4436eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4555,6 +4555,7 @@ BPF [NETWORKING] (tcx & tc BPF, sock_addr) M: Martin KaFai Lau M: Daniel Borkmann R: John Fastabend +R: Stanislav Fomichev L: bpf@vger.kernel.org L: netdev@vger.kernel.org S: Maintained @@ -26948,6 +26949,7 @@ M: David S. Miller M: Jakub Kicinski M: Jesper Dangaard Brouer M: John Fastabend +R: Stanislav Fomichev L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported @@ -26969,6 +26971,7 @@ M: Björn Töpel M: Magnus Karlsson M: Maciej Fijalkowski R: Jonathan Lemon +R: Stanislav Fomichev L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Maintained From dc9c67820f81ee0d34f9095195228fcb828315ff Mon Sep 17 00:00:00 2001 From: Lucas Sanchez Sagrado Date: Mon, 9 Jun 2025 16:55:36 +0200 Subject: [PATCH 140/497] net: usb: r8152: Add device ID for TP-Link UE200 The TP-Link UE200 is a RTL8152B based USB 2.0 Fast Ethernet adapter. This patch adds its device ID. It has been tested on Ubuntu 22.04.5. Signed-off-by: Lucas Sanchez Sagrado Reviewed-by: Subbaraya Sundeep Link: https://patch.msgid.link/20250609145536.26648-1-lucsansag@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/r8152.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d6589b24c68d..44cba7acfe7d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -10054,6 +10054,7 @@ static const struct usb_device_id rtl8152_table[] = { { USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041) }, { USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff) }, { USB_DEVICE(VENDOR_ID_TPLINK, 0x0601) }, + { USB_DEVICE(VENDOR_ID_TPLINK, 0x0602) }, { USB_DEVICE(VENDOR_ID_DLINK, 0xb301) }, { USB_DEVICE(VENDOR_ID_DELL, 0xb097) }, { USB_DEVICE(VENDOR_ID_ASUS, 0x1976) }, From 2660a544fdc0940bba15f70508a46cf9a6491230 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 9 Jun 2025 19:08:03 +0200 Subject: [PATCH 141/497] net: Fix TOCTOU issue in sk_is_readable() sk->sk_prot->sock_is_readable is a valid function pointer when sk resides in a sockmap. After the last sk_psock_put() (which usually happens when socket is removed from sockmap), sk->sk_prot gets restored and sk->sk_prot->sock_is_readable becomes NULL. This makes sk_is_readable() racy, if the value of sk->sk_prot is reloaded after the initial check. Which in turn may lead to a null pointer dereference. Ensure the function pointer does not turn NULL after the check. Fixes: 8934ce2fd081 ("bpf: sockmap redirect ingress support") Suggested-by: Jakub Sitnicki Signed-off-by: Michal Luczaj Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250609-skisreadable-toctou-v1-1-d0dfb2d62c37@rbox.co Signed-off-by: Jakub Kicinski --- include/net/sock.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 92e7c1aae3cc..4c37015b7cf7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -3010,8 +3010,11 @@ int sock_ioctl_inout(struct sock *sk, unsigned int cmd, int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); static inline bool sk_is_readable(struct sock *sk) { - if (sk->sk_prot->sock_is_readable) - return sk->sk_prot->sock_is_readable(sk); + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot->sock_is_readable) + return prot->sock_is_readable(sk); + return false; } #endif /* _SOCK_H */ From c85bf1975108d2e2431c11d1cb7e95aca587dfbe Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Mon, 9 Jun 2025 11:24:20 -0700 Subject: [PATCH 142/497] netconsole: fix appending sysdata when sysdata_fields == SYSDATA_RELEASE Before appending sysdata, prepare_extradata() checks if any feature is enabled in sysdata_fields (and exits early if none is enabled). When SYSDATA_RELEASE was introduced, we missed adding it to the list of features being checked against sysdata_fields in prepare_extradata(). The result was that, if only SYSDATA_RELEASE is enabled in sysdata_fields, we incorreclty exit early and fail to append the release. Instead of checking specific bits in sysdata_fields, check if sysdata_fields has ALL bit zeroed and exit early if true. This fixes case when only SYSDATA_RELEASE enabled and makes the code more general / less error prone in future feature implementation. Signed-off-by: Gustavo Luiz Duarte Reviewed-by: Breno Leitao Fixes: cfcc9239e78a ("netconsole: append release to sysdata") Link: https://patch.msgid.link/20250609-netconsole-fix-v1-1-17543611ae31@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 4289ccd3e41b..176935a8645f 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1252,7 +1252,6 @@ static int sysdata_append_release(struct netconsole_target *nt, int offset) */ static int prepare_extradata(struct netconsole_target *nt) { - u32 fields = SYSDATA_CPU_NR | SYSDATA_TASKNAME; int extradata_len; /* userdata was appended when configfs write helper was called @@ -1260,7 +1259,7 @@ static int prepare_extradata(struct netconsole_target *nt) */ extradata_len = nt->userdata_length; - if (!(nt->sysdata_fields & fields)) + if (!nt->sysdata_fields) goto out; if (nt->sysdata_fields & SYSDATA_CPU_NR) From f478d68b653323b691280b40fbd3b8ca1ac75aa2 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 9 Jun 2025 22:40:35 +0200 Subject: [PATCH 143/497] net: airoha: Enable RX queues 16-31 Fix RX_DONE_INT_MASK definition in order to enable RX queues 16-31. Fixes: f252493e18353 ("net: airoha: Enable multiple IRQ lines support in airoha_eth driver.") Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250609-aioha-fix-rx-queue-mask-v1-1-f33706a06fa2@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_regs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 04187eb40ec6..150c85995cc1 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -614,8 +614,9 @@ RX19_DONE_INT_MASK | RX18_DONE_INT_MASK | \ RX17_DONE_INT_MASK | RX16_DONE_INT_MASK) -#define RX_DONE_INT_MASK (RX_DONE_HIGH_INT_MASK | RX_DONE_LOW_INT_MASK) #define RX_DONE_HIGH_OFFSET fls(RX_DONE_HIGH_INT_MASK) +#define RX_DONE_INT_MASK \ + ((RX_DONE_HIGH_INT_MASK << RX_DONE_HIGH_OFFSET) | RX_DONE_LOW_INT_MASK) #define INT_RX2_MASK(_n) \ ((RX_NO_CPU_DSCP_HIGH_INT_MASK & (_n)) | \ From 40a98e702b528c631094f2e524d309faf33dc774 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 10 Jun 2025 12:16:00 -0700 Subject: [PATCH 144/497] crypto: hkdf - move to late_initcall The HKDF self-tests depend on the HMAC algorithms being registered. HMAC is now registered at module_init, which put it at the same level as HKDF. Move HKDF to late_initcall so that it runs afterwards. Fixes: ef93f1562803 ("Revert "crypto: run initcalls for generic implementations earlier"") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/hkdf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/hkdf.c b/crypto/hkdf.c index f24c2a8d4df9..82d1b32ca6ce 100644 --- a/crypto/hkdf.c +++ b/crypto/hkdf.c @@ -566,7 +566,7 @@ static int __init crypto_hkdf_module_init(void) static void __exit crypto_hkdf_module_exit(void) {} -module_init(crypto_hkdf_module_init); +late_initcall(crypto_hkdf_module_init); module_exit(crypto_hkdf_module_exit); MODULE_LICENSE("GPL"); From b8d3291d3185b5c69ba36362ae1a370f06a33e5c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 10 Jun 2025 12:40:38 -0700 Subject: [PATCH 145/497] drm/sitronix: st7571-i2c: Select VIDEOMODE_HELPERS This driver requires of_get_display_timing() from CONFIG_VIDEOMODE_HELPERS but does not select it. If no other driver selects it, there will be a failure from the linker if the driver is built in or modpost if it is a module. ERROR: modpost: "of_get_display_timing" [drivers/gpu/drm/sitronix/st7571-i2c.ko] undefined! Select CONFIG_VIDEOMODE_HELPERS to resolve the build failure. Fixes: 4b35f0f41ee2 ("drm/st7571-i2c: add support for Sitronix ST7571 LCD controller") Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250610-drm-st7571-i2c-select-videomode-helpers-v1-1-d30b50ff6e64@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sitronix/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/sitronix/Kconfig b/drivers/gpu/drm/sitronix/Kconfig index c069d0d41775..741d1bb4b83f 100644 --- a/drivers/gpu/drm/sitronix/Kconfig +++ b/drivers/gpu/drm/sitronix/Kconfig @@ -5,6 +5,7 @@ config DRM_ST7571_I2C select DRM_GEM_SHMEM_HELPER select DRM_KMS_HELPER select REGMAP_I2C + select VIDEOMODE_HELPERS help DRM driver for Sitronix ST7571 panels controlled over I2C. From d9816ec74e6d6aa29219d010bba3f780ba1d9d75 Mon Sep 17 00:00:00 2001 From: Carlos Fernandez Date: Mon, 9 Jun 2025 09:26:26 +0200 Subject: [PATCH 146/497] macsec: MACsec SCI assignment for ES = 0 According to 802.1AE standard, when ES and SC flags in TCI are zero, used SCI should be the current active SC_RX. Current code uses the header MAC address. Without this patch, when ES flag is 0 (using a bridge or switch), header MAC will not fit the SCI and MACSec frames will be discarted. In order to test this issue, MACsec link should be stablished between two interfaces, setting SC and ES flags to zero and a port identifier different than one. For example, using ip macsec tools: ip link add link $ETH0 macsec0 type macsec port 11 send_sci off end_station off ip macsec add macsec0 tx sa 0 pn 2 on key 01 $ETH1_KEY ip macsec add macsec0 rx port 11 address $ETH1_MAC ip macsec add macsec0 rx port 11 address $ETH1_MAC sa 0 pn 2 on key 02 ip link set dev macsec0 up ip link add link $ETH1 macsec1 type macsec port 11 send_sci off end_station off ip macsec add macsec1 tx sa 0 pn 2 on key 01 $ETH0_KEY ip macsec add macsec1 rx port 11 address $ETH0_MAC ip macsec add macsec1 rx port 11 address $ETH0_MAC sa 0 pn 2 on key 02 ip link set dev macsec1 up Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Co-developed-by: Andreu Montiel Signed-off-by: Andreu Montiel Signed-off-by: Carlos Fernandez Reviewed-by: Subbaraya Sundeep Signed-off-by: David S. Miller --- drivers/net/macsec.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3d315e30ee47..7edbe76b5455 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -247,15 +247,39 @@ static sci_t make_sci(const u8 *addr, __be16 port) return sci; } -static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present) +static sci_t macsec_active_sci(struct macsec_secy *secy) { - sci_t sci; + struct macsec_rx_sc *rx_sc = rcu_dereference_bh(secy->rx_sc); - if (sci_present) + /* Case single RX SC */ + if (rx_sc && !rcu_dereference_bh(rx_sc->next)) + return (rx_sc->active) ? rx_sc->sci : 0; + /* Case no RX SC or multiple */ + else + return 0; +} + +static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present, + struct macsec_rxh_data *rxd) +{ + struct macsec_dev *macsec; + sci_t sci = 0; + + /* SC = 1 */ + if (sci_present) { memcpy(&sci, hdr->secure_channel_id, sizeof(hdr->secure_channel_id)); - else + /* SC = 0; ES = 0 */ + } else if ((!(hdr->tci_an & (MACSEC_TCI_ES | MACSEC_TCI_SC))) && + (list_is_singular(&rxd->secys))) { + /* Only one SECY should exist on this scenario */ + macsec = list_first_or_null_rcu(&rxd->secys, struct macsec_dev, + secys); + if (macsec) + return macsec_active_sci(&macsec->secy); + } else { sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES); + } return sci; } @@ -1109,7 +1133,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) struct macsec_rxh_data *rxd; struct macsec_dev *macsec; unsigned int len; - sci_t sci; + sci_t sci = 0; u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; @@ -1156,11 +1180,14 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC); macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK; - sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci); rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); + sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci, rxd); + if (!sci) + goto drop_nosc; + list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); @@ -1283,6 +1310,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) macsec_rxsa_put(rx_sa); drop_nosa: macsec_rxsc_put(rx_sc); +drop_nosc: rcu_read_unlock(); drop_direct: kfree_skb(skb); From 1dbf30fdb5e57fb2c39f17f35f2b544d5de34397 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 3 Jun 2025 14:14:41 +0300 Subject: [PATCH 147/497] x86/mm/pat: don't collapse pages without PSE set Collapsing pages to a leaf PMD or PUD should be done only if X86_FEATURE_PSE is available, which is not the case when running e.g. as a Xen PV guest. Fixes: 41d88484c71c ("x86/mm/pat: restore large ROX pages after fragmentation") Signed-off-by: Juergen Gross Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250528123557.12847-3-jgross@suse.com --- arch/x86/mm/pat/set_memory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 46edc11726b7..8834c76f91c9 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, pgprot_t pgprot; int i = 0; + if (!cpu_feature_enabled(X86_FEATURE_PSE)) + return 0; + addr &= PMD_MASK; pte = pte_offset_kernel(pmd, addr); first = *pte; From 47410d839fcda6890cb82828f874f97710982f24 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 3 Jun 2025 14:14:42 +0300 Subject: [PATCH 148/497] x86/Kconfig: only enable ROX cache in execmem when STRICT_MODULE_RWX is set Currently ROX cache in execmem is enabled regardless of STRICT_MODULE_RWX setting. This breaks an assumption that module memory is writable when STRICT_MODULE_RWX is disabled, for instance for kernel debuggin. Only enable ROX cache in execmem when STRICT_MODULE_RWX is set to restore the original behaviour of module text permissions. Fixes: 64f6a4e10c05 ("x86: re-enable EXECMEM_ROX support") Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250603111446.2609381-3-rppt@kernel.org --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 340e5468980e..71019b3b54ea 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -89,7 +89,7 @@ config X86 select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE - select ARCH_HAS_EXECMEM_ROX if X86_64 + select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL From 0b0cae7119a0ec9449d7261b5e672a5fed765068 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 3 Jun 2025 14:14:43 +0300 Subject: [PATCH 149/497] x86/its: move its_pages array to struct mod_arch_specific The of pages with ITS thunks allocated for modules are tracked by an array in 'struct module'. Since this is very architecture specific data structure, move it to 'struct mod_arch_specific'. No functional changes. Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250603111446.2609381-4-rppt@kernel.org --- arch/x86/include/asm/module.h | 8 ++++++++ arch/x86/kernel/alternative.c | 19 ++++++++++--------- include/linux/module.h | 5 ----- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index e988bac0a4a1..3c2de4ce3b10 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -5,12 +5,20 @@ #include #include +struct its_array { +#ifdef CONFIG_MITIGATION_ITS + void **pages; + int num; +#endif +}; + struct mod_arch_specific { #ifdef CONFIG_UNWINDER_ORC unsigned int num_orcs; int *orc_unwind_ip; struct orc_entry *orc_unwind; #endif + struct its_array its_pages; }; #endif /* _ASM_X86_MODULE_H */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ecfe7b497cad..b50fe6ce4655 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -173,8 +173,8 @@ void its_fini_mod(struct module *mod) its_page = NULL; mutex_unlock(&text_mutex); - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; + for (int i = 0; i < mod->arch.its_pages.num; i++) { + void *page = mod->arch.its_pages.pages[i]; execmem_restore_rox(page, PAGE_SIZE); } } @@ -184,11 +184,11 @@ void its_free_mod(struct module *mod) if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) return; - for (int i = 0; i < mod->its_num_pages; i++) { - void *page = mod->its_page_array[i]; + for (int i = 0; i < mod->arch.its_pages.num; i++) { + void *page = mod->arch.its_pages.pages[i]; execmem_free(page); } - kfree(mod->its_page_array); + kfree(mod->arch.its_pages.pages); } #endif /* CONFIG_MODULES */ @@ -201,14 +201,15 @@ static void *its_alloc(void) #ifdef CONFIG_MODULES if (its_mod) { - void *tmp = krealloc(its_mod->its_page_array, - (its_mod->its_num_pages+1) * sizeof(void *), + struct its_array *pages = &its_mod->arch.its_pages; + void *tmp = krealloc(pages->pages, + (pages->num+1) * sizeof(void *), GFP_KERNEL); if (!tmp) return NULL; - its_mod->its_page_array = tmp; - its_mod->its_page_array[its_mod->its_num_pages++] = page; + pages->pages = tmp; + pages->pages[pages->num++] = page; execmem_make_temp_rw(page, PAGE_SIZE); } diff --git a/include/linux/module.h b/include/linux/module.h index 92e1420fccdf..5faa1fb1f4b4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -586,11 +586,6 @@ struct module { atomic_t refcnt; #endif -#ifdef CONFIG_MITIGATION_ITS - int its_num_pages; - void **its_page_array; -#endif - #ifdef CONFIG_CONSTRUCTORS /* Constructor functions. */ ctor_fn_t *ctors; From a82b26451de126a5ae130361081986bc459afe9b Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Tue, 3 Jun 2025 14:14:44 +0300 Subject: [PATCH 150/497] x86/its: explicitly manage permissions for ITS pages execmem_alloc() sets permissions differently depending on the kernel configuration, CPU support for PSE and whether a page is allocated before or after mark_rodata_ro(). Add tracking for pages allocated for ITS when patching the core kernel and make sure the permissions for ITS pages are explicitly managed for both kernel and module allocations. Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") Signed-off-by: Peter Zijlstra (Intel) Co-developed-by: Mike Rapoport (Microsoft) Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nikolay Borisov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250603111446.2609381-5-rppt@kernel.org --- arch/x86/kernel/alternative.c | 74 ++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index b50fe6ce4655..6455f7f751b3 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -116,6 +116,24 @@ static struct module *its_mod; #endif static void *its_page; static unsigned int its_offset; +struct its_array its_pages; + +static void *__its_alloc(struct its_array *pages) +{ + void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + if (!page) + return NULL; + + void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + + pages->pages = tmp; + pages->pages[pages->num++] = page; + + return no_free_ptr(page); +} /* Initialize a thunk with the "jmp *reg; int3" instructions. */ static void *its_init_thunk(void *thunk, int reg) @@ -151,6 +169,21 @@ static void *its_init_thunk(void *thunk, int reg) return thunk + offset; } +static void its_pages_protect(struct its_array *pages) +{ + for (int i = 0; i < pages->num; i++) { + void *page = pages->pages[i]; + execmem_restore_rox(page, PAGE_SIZE); + } +} + +static void its_fini_core(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + its_pages_protect(&its_pages); + kfree(its_pages.pages); +} + #ifdef CONFIG_MODULES void its_init_mod(struct module *mod) { @@ -173,10 +206,8 @@ void its_fini_mod(struct module *mod) its_page = NULL; mutex_unlock(&text_mutex); - for (int i = 0; i < mod->arch.its_pages.num; i++) { - void *page = mod->arch.its_pages.pages[i]; - execmem_restore_rox(page, PAGE_SIZE); - } + if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + its_pages_protect(&mod->arch.its_pages); } void its_free_mod(struct module *mod) @@ -194,28 +225,23 @@ void its_free_mod(struct module *mod) static void *its_alloc(void) { - void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); + struct its_array *pages = &its_pages; + void *page; +#ifdef CONFIG_MODULE + if (its_mod) + pages = &its_mod->arch.its_pages; +#endif + + page = __its_alloc(pages); if (!page) return NULL; -#ifdef CONFIG_MODULES - if (its_mod) { - struct its_array *pages = &its_mod->arch.its_pages; - void *tmp = krealloc(pages->pages, - (pages->num+1) * sizeof(void *), - GFP_KERNEL); - if (!tmp) - return NULL; + execmem_make_temp_rw(page, PAGE_SIZE); + if (pages == &its_pages) + set_memory_x((unsigned long)page, 1); - pages->pages = tmp; - pages->pages[pages->num++] = page; - - execmem_make_temp_rw(page, PAGE_SIZE); - } -#endif /* CONFIG_MODULES */ - - return no_free_ptr(page); + return page; } static void *its_allocate_thunk(int reg) @@ -269,7 +295,9 @@ u8 *its_static_thunk(int reg) return thunk; } -#endif +#else +static inline void its_fini_core(void) {} +#endif /* CONFIG_MITIGATION_ITS */ /* * Nomenclature for variable names to simplify and clarify this code and ease @@ -2339,6 +2367,8 @@ void __init alternative_instructions(void) apply_retpolines(__retpoline_sites, __retpoline_sites_end); apply_returns(__return_sites, __return_sites_end); + its_fini_core(); + /* * Adjust all CALL instructions to point to func()-10, including * those in .altinstr_replacement. From 7cd9a11dd0c3d1dd225795ed1b5b53132888e7b5 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 3 Jun 2025 14:14:45 +0300 Subject: [PATCH 151/497] Revert "mm/execmem: Unify early execmem_cache behaviour" The commit d6d1e3e6580c ("mm/execmem: Unify early execmem_cache behaviour") changed early behaviour of execemem ROX cache to allow its usage in early x86 code that allocates text pages when CONFIG_MITGATION_ITS is enabled. The permission management of the pages allocated from execmem for ITS mitigation is now completely contained in arch/x86/kernel/alternatives.c and therefore there is no need to special case early allocations in execmem. This reverts commit d6d1e3e6580ca35071ad474381f053cbf1fb6414. Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20250603111446.2609381-6-rppt@kernel.org --- arch/x86/mm/init_32.c | 3 --- arch/x86/mm/init_64.c | 3 --- include/linux/execmem.h | 8 +------- mm/execmem.c | 40 +++------------------------------------- 4 files changed, 4 insertions(+), 50 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 607d6a2e66e2..8a34fff6ab2b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -749,8 +748,6 @@ void mark_rodata_ro(void) pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); - execmem_cache_make_ro(); - kernel_set_to_readonly = 1; #ifdef CONFIG_CPA_DEBUG diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee66fae9ebcc..fdb6cab524f0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -1392,8 +1391,6 @@ void mark_rodata_ro(void) (end - start) >> 10); set_memory_ro(start, (end - start) >> PAGE_SHIFT); - execmem_cache_make_ro(); - kernel_set_to_readonly = 1; /* diff --git a/include/linux/execmem.h b/include/linux/execmem.h index ca42d5e46ccc..3be35680a54f 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -54,7 +54,7 @@ enum execmem_range_flags { EXECMEM_ROX_CACHE = (1 << 1), }; -#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) +#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX /** * execmem_fill_trapping_insns - set memory to contain instructions that * will trap @@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size_t size); * Return: 0 on success or negative error code on failure. */ int execmem_restore_rox(void *ptr, size_t size); - -/* - * Called from mark_readonly(), where the system transitions to ROX. - */ -void execmem_cache_make_ro(void); #else static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } -static inline void execmem_cache_make_ro(void) { } #endif /** diff --git a/mm/execmem.c b/mm/execmem.c index 9720ac2dfa41..2b683e7d864d 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -254,34 +254,6 @@ static void *__execmem_cache_alloc(struct execmem_range *range, size_t size) return ptr; } -static bool execmem_cache_rox = false; - -void execmem_cache_make_ro(void) -{ - struct maple_tree *free_areas = &execmem_cache.free_areas; - struct maple_tree *busy_areas = &execmem_cache.busy_areas; - MA_STATE(mas_free, free_areas, 0, ULONG_MAX); - MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); - struct mutex *mutex = &execmem_cache.mutex; - void *area; - - execmem_cache_rox = true; - - mutex_lock(mutex); - - mas_for_each(&mas_free, area, ULONG_MAX) { - unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT; - set_memory_ro(mas_free.index, pages); - } - - mas_for_each(&mas_busy, area, ULONG_MAX) { - unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT; - set_memory_ro(mas_busy.index, pages); - } - - mutex_unlock(mutex); -} - static int execmem_cache_populate(struct execmem_range *range, size_t size) { unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; @@ -302,15 +274,9 @@ static int execmem_cache_populate(struct execmem_range *range, size_t size) /* fill memory with instructions that will trap */ execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); - if (execmem_cache_rox) { - err = set_memory_rox((unsigned long)p, vm->nr_pages); - if (err) - goto err_free_mem; - } else { - err = set_memory_x((unsigned long)p, vm->nr_pages); - if (err) - goto err_free_mem; - } + err = set_memory_rox((unsigned long)p, vm->nr_pages); + if (err) + goto err_free_mem; err = execmem_cache_add(p, alloc_size); if (err) From 570896604f47d44d4ff6882d2a588428d2a6ef17 Mon Sep 17 00:00:00 2001 From: Francesco Dolcini Date: Thu, 5 Jun 2025 15:03:02 +0200 Subject: [PATCH 152/497] Revert "wifi: mwifiex: Fix HT40 bandwidth issue." This reverts commit 4fcfcbe45734 ("wifi: mwifiex: Fix HT40 bandwidth issue.") That commit introduces a regression, when HT40 mode is enabled, received packets are lost, this was experience with W8997 with both SDIO-UART and SDIO-SDIO variants. From an initial investigation the issue solves on its own after some time, but it's not clear what is the reason. Given that this was just a performance optimization, let's revert it till we have a better understanding of the issue and a proper fix. Cc: Jeff Chen Cc: stable@vger.kernel.org Fixes: 4fcfcbe45734 ("wifi: mwifiex: Fix HT40 bandwidth issue.") Closes: https://lore.kernel.org/all/20250603203337.GA109929@francesco-nb/ Signed-off-by: Francesco Dolcini Link: https://patch.msgid.link/20250605130302.55555-1-francesco@dolcini.it [fix commit reference format] Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwifiex/11n.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index 738bafc3749b..66f0f5377ac1 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -403,14 +403,12 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, if (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 && bss_desc->bcn_ht_oper->ht_param & - IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) { - chan_list->chan_scan_param[0].radio_type |= - CHAN_BW_40MHZ << 2; + IEEE80211_HT_PARAM_CHAN_WIDTH_ANY) SET_SECONDARYCHAN(chan_list->chan_scan_param[0]. radio_type, (bss_desc->bcn_ht_oper->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET)); - } + *buffer += struct_size(chan_list, chan_scan_param, 1); ret_len += struct_size(chan_list, chan_scan_param, 1); } From 6a0f81c549a00f87865ff0ad2400944ca0726868 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 3 Jun 2025 12:17:54 +0300 Subject: [PATCH 153/497] wifi: iwlwifi: fix merge damage related to iwl_pci_resume The changes I made in wifi: iwlwifi: don't warn if the NIC is gone in resume conflicted with a major rework done in this area. The merge de-facto removed my changes. Re-add them. Signed-off-by: Emmanuel Grumbach Link: https://patch.msgid.link/20250603091754.70182-1-emmanuel.grumbach@intel.com Fixes: 06c4b2036818 ("Merge tag 'iwlwifi-next-2025-05-15' of https://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-next") Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 656f8b06c27b..0a9e0dbb58fb 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1501,11 +1501,27 @@ static int _iwl_pci_resume(struct device *device, bool restore) * Scratch value was altered, this means the device was powered off, we * need to reset it completely. * Note: MAC (bits 0:7) will be cleared upon suspend even with wowlan, - * so assume that any bits there mean that the device is usable. + * but not bits [15:8]. So if we have bits set in lower word, assume + * the device is alive. + * For older devices, just try silently to grab the NIC. */ - if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ && - !iwl_read32(trans, CSR_FUNC_SCRATCH)) - device_was_powered_off = true; + if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_BZ) { + if (!(iwl_read32(trans, CSR_FUNC_SCRATCH) & + CSR_FUNC_SCRATCH_POWER_OFF_MASK)) + device_was_powered_off = true; + } else { + /* + * bh are re-enabled by iwl_trans_pcie_release_nic_access, + * so re-enable them if _iwl_trans_pcie_grab_nic_access fails. + */ + local_bh_disable(); + if (_iwl_trans_pcie_grab_nic_access(trans, true)) { + iwl_trans_pcie_release_nic_access(trans); + } else { + device_was_powered_off = true; + local_bh_enable(); + } + } if (restore || device_was_powered_off) { trans->state = IWL_TRANS_NO_FW; From f87586598fffac31afc1141471b789251b030a76 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Fri, 23 May 2025 11:01:56 +0000 Subject: [PATCH 154/497] wifi: cfg80211: use kfree_sensitive() for connkeys cleanup The nl80211_parse_connkeys() function currently uses kfree() to release the 'result' structure in error handling paths. However, if an error occurs due to result->def being less than 0, the 'result' structure may contain sensitive information. To prevent potential leakage of sensitive data, replace kfree() with kfree_sensitive() when freeing 'result'. This change aligns with the approach used in its caller, nl80211_join_ibss(), enhancing the overall security of the wireless subsystem. Signed-off-by: Zilin Guan Link: https://patch.msgid.link/20250523110156.4017111-1-zilin@seu.edu.cn Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fd5f79266471..85f139016da2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1583,7 +1583,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, return result; error: - kfree(result); + kfree_sensitive(result); return ERR_PTR(err); } From aa3f93cd75bf8195e434deefff853d70d723d6f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 5 Jun 2025 10:38:30 +0200 Subject: [PATCH 155/497] dma-buf: fix compare in WARN_ON_ONCE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Smatch pointed out this trivial typo: drivers/dma-buf/dma-buf.c:1123 dma_buf_map_attachment() warn: passing positive error code '16' to 'ERR_PTR' drivers/dma-buf/dma-buf.c 1113 dma_resv_assert_held(attach->dmabuf->resv); 1114 1115 if (dma_buf_pin_on_map(attach)) { 1116 ret = attach->dmabuf->ops->pin(attach); 1117 /* 1118 * Catch exporters making buffers inaccessible even when 1119 * attachments preventing that exist. 1120 */ 1121 WARN_ON_ONCE(ret == EBUSY); ^^^^^ This was probably intended to be -EBUSY? 1122 if (ret) --> 1123 return ERR_PTR(ret); ^^^ Otherwise we will eventually crash. 1124 } 1125 1126 sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); 1127 if (!sg_table) 1128 sg_table = ERR_PTR(-ENOMEM); 1129 if (IS_ERR(sg_table)) 1130 goto error_unpin; 1131 Signed-off-by: Christian König Reviewed-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250605085336.62156-1-christian.koenig@amd.com --- drivers/dma-buf/dma-buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 890ecac04dac..2bcf9ceca997 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -1118,7 +1118,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach, * Catch exporters making buffers inaccessible even when * attachments preventing that exist. */ - WARN_ON_ONCE(ret == EBUSY); + WARN_ON_ONCE(ret == -EBUSY); if (ret) return ERR_PTR(ret); } From 6bdd3a01fe4627ad7a562ba38eb759eba715b671 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2025 16:00:20 +0200 Subject: [PATCH 156/497] fs: add missing values to TRACE_IOCB_STRINGS Make sure all values are covered. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/20250610140020.2227932-1-hch@lst.de Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..d27c402f1162 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -399,7 +399,9 @@ struct readahead_control; { IOCB_WAITQ, "WAITQ" }, \ { IOCB_NOIO, "NOIO" }, \ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \ - { IOCB_DIO_CALLER_COMP, "CALLER_COMP" } + { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }, \ + { IOCB_AIO_RW, "AIO_RW" }, \ + { IOCB_HAS_METADATA, "AIO_HAS_METADATA" } struct kiocb { struct file *ki_filp; From ad5a0351064c8f744416df3a49156d2c61af5bc0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 10 Jun 2025 11:04:04 +1000 Subject: [PATCH 157/497] VFS: change try_lookup_noperm() to skip revalidation The recent change from using d_hash_and_lookup() to using try_lookup_noperm() inadvertently introduce a d_revalidate() call when the lookup was successful. Steven French reports that this resulted in worse than halving of performance in some cases. Prior to the offending patch the only caller of try_lookup_noperm() was autofs which does not need the d_revalidate(). So it is safe to remove the d_revalidate() call providing we stop using try_lookup_noperm() to implement lookup_noperm(). The "try_" in the name is strongly suggestive that the caller isn't expecting much effort, so it seems reasonable to avoid the effort of d_revalidate(). Fixes: 06c567403ae5 ("Use try_lookup_noperm() instead of d_hash_and_lookup() outside of VFS") Reported-by: Steve French Link: https://lore.kernel.org/all/CAH2r5mu5SfBrdc2CFHwzft8=n9koPMk+Jzwpy-oUMx-wCRCesQ@mail.gmail.com/ Signed-off-by: NeilBrown Link: https://lore.kernel.org/174951744454.608730.18354002683881684261@noble.neil.brown.name Tested-by: Steve French Signed-off-by: Christian Brauner --- fs/namei.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 4bb889fc980b..f761cafaeaad 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2917,7 +2917,8 @@ static int lookup_one_common(struct mnt_idmap *idmap, * @base: base directory to lookup from * * Look up a dentry by name in the dcache, returning NULL if it does not - * currently exist. The function does not try to create a dentry. + * currently exist. The function does not try to create a dentry and if one + * is found it doesn't try to revalidate it. * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. @@ -2933,7 +2934,7 @@ struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base) if (err) return ERR_PTR(err); - return lookup_dcache(name, base, 0); + return d_lookup(base, name); } EXPORT_SYMBOL(try_lookup_noperm); @@ -3057,14 +3058,22 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked); * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. * - * Unlike lookup_noperm, it should be called without the parent + * Unlike lookup_noperm(), it should be called without the parent * i_rwsem held, and will take the i_rwsem itself if necessary. + * + * Unlike try_lookup_noperm() it *does* revalidate the dentry if it already + * existed. */ struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base) { struct dentry *ret; + int err; - ret = try_lookup_noperm(name, base); + err = lookup_noperm_common(name, base); + if (err) + return ERR_PTR(err); + + ret = lookup_dcache(name, base, 0); if (!ret) ret = lookup_slow(name, base, 0); return ret; From 3e5378779091c2f9a96d4404066e1923c92ceb8b Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Tue, 10 Jun 2025 15:54:14 +0200 Subject: [PATCH 158/497] ata: pata_macio: Fix PCI region leak pci_request_regions() became a managed devres functions if the PCI device was enabled with pcim_enable_device(), which is the case for pata_macio. The PCI subsystem recently removed this hybrid feature from pci_request_region(). When doing so, pata_macio was forgotten to be ported to use pcim_request_all_regions(). If that function is not used, pata_macio will fail on driver-reload because the PCI regions will remain blocked. Fix the region leak by replacing pci_request_regions() with its managed counterpart, pcim_request_all_regions(). Fixes: 51f6aec99cb0 ("PCI: Remove hybrid devres nature from request functions") Signed-off-by: Philipp Stanner Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20250610135413.35930-2-phasta@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/pata_macio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index fbf5f07ea357..f7a933eefe05 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -1298,7 +1298,7 @@ static int pata_macio_pci_attach(struct pci_dev *pdev, priv->dev = &pdev->dev; /* Get MMIO regions */ - if (pci_request_regions(pdev, "pata-macio")) { + if (pcim_request_all_regions(pdev, "pata-macio")) { dev_err(&pdev->dev, "Cannot obtain PCI resources\n"); return -EBUSY; From 6f29d393061c2c1fa30a72a0e9b01e15e09dae34 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 10 Jun 2025 13:07:59 +0200 Subject: [PATCH 159/497] ata: ahci: Use correct BIOS build date for ThinkPad W541 quirk Fix the TODO in ahci_broken_lpm() by using the proper BIOS build date. The proper BIOS build date was provided by Hans, see Link. Link: https://lore.kernel.org/linux-ide/6ea509c8-b38d-4941-8a29-c1117ff3dd5b@redhat.com/ Reviewed-by: Damien Le Moal Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250610110757.1318959-6-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 163ac909bd06..e7c8357cbc54 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1438,13 +1438,7 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"), }, - /* - * Note date based on release notes, 2.35 has been - * reported to be good, but I've been unable to get - * a hold of the reporter to get the DMI BIOS date. - * TODO: fix this. - */ - .driver_data = "20180310", /* 2.35 */ + .driver_data = "20180409", /* 2.35 */ }, { } /* terminate list */ }; From afe382843717d44b24ef5014d57dcbaab75a4052 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 7 May 2025 18:09:12 +0200 Subject: [PATCH 160/497] udmabuf: use sgtable-based scatterlist wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use common wrappers operating directly on the struct sg_table objects to fix incorrect use of scatterlists sync calls. dma_sync_sg_for_*() functions have to be called with the number of elements originally passed to dma_map_sg_*() function, not the one returned in sgtable's nents. Fixes: 1ffe09590121 ("udmabuf: fix dma-buf cpu access") CC: stable@vger.kernel.org Signed-off-by: Marek Szyprowski Acked-by: Vivek Kasireddy Reviewed-by: Christian König Signed-off-by: Christian König Link: https://lore.kernel.org/r/20250507160913.2084079-3-m.szyprowski@samsung.com --- drivers/dma-buf/udmabuf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c index 7eee3eb47a8e..c9d0c68d2fcb 100644 --- a/drivers/dma-buf/udmabuf.c +++ b/drivers/dma-buf/udmabuf.c @@ -264,8 +264,7 @@ static int begin_cpu_udmabuf(struct dma_buf *buf, ubuf->sg = NULL; } } else { - dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents, - direction); + dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction); } return ret; @@ -280,7 +279,7 @@ static int end_cpu_udmabuf(struct dma_buf *buf, if (!ubuf->sg) return -EINVAL; - dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction); + dma_sync_sgtable_for_device(dev, ubuf->sg, direction); return 0; } From 0e629694126ca388916f059453a1c36adde219c4 Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 9 Jun 2025 17:31:46 +0200 Subject: [PATCH 161/497] net/mdiobus: Fix potential out-of-bounds read/write access When using publicly available tools like 'mdio-tools' to read/write data from/to network interface and its PHY via mdiobus, there is no verification of parameters passed to the ioctl and it accepts any mdio address. Currently there is support for 32 addresses in kernel via PHY_MAX_ADDR define, but it is possible to pass higher value than that via ioctl. While read/write operation should generally fail in this case, mdiobus provides stats array, where wrong address may allow out-of-bounds read/write. Fix that by adding address verification before read/write operation. While this excludes this access from any statistics, it improves security of read/write operation. Fixes: 080bb352fad00 ("net: phy: Maintain MDIO device and bus statistics") Signed-off-by: Jakub Raczynski Reported-by: Wenjing Shan Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index a6bcb0fee863..60fd0cd7cb9c 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -445,6 +445,9 @@ int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read) retval = bus->read(bus, addr, regnum); else @@ -474,6 +477,9 @@ int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write) err = bus->write(bus, addr, regnum, val); else From 260388f79e94fb3026c419a208ece8358bb7b555 Mon Sep 17 00:00:00 2001 From: Jakub Raczynski Date: Mon, 9 Jun 2025 17:31:47 +0200 Subject: [PATCH 162/497] net/mdiobus: Fix potential out-of-bounds clause 45 read/write access When using publicly available tools like 'mdio-tools' to read/write data from/to network interface and its PHY via C45 (clause 45) mdiobus, there is no verification of parameters passed to the ioctl and it accepts any mdio address. Currently there is support for 32 addresses in kernel via PHY_MAX_ADDR define, but it is possible to pass higher value than that via ioctl. While read/write operation should generally fail in this case, mdiobus provides stats array, where wrong address may allow out-of-bounds read/write. Fix that by adding address verification before C45 read/write operation. While this excludes this access from any statistics, it improves security of read/write operation. Fixes: 4e4aafcddbbf ("net: mdio: Add dedicated C45 API to MDIO bus drivers") Signed-off-by: Jakub Raczynski Reported-by: Wenjing Shan Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 60fd0cd7cb9c..fda2e27c1810 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -541,6 +541,9 @@ int __mdiobus_c45_read(struct mii_bus *bus, int addr, int devad, u32 regnum) lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->read_c45) retval = bus->read_c45(bus, addr, devad, regnum); else @@ -572,6 +575,9 @@ int __mdiobus_c45_write(struct mii_bus *bus, int addr, int devad, u32 regnum, lockdep_assert_held_once(&bus->mdio_lock); + if (addr >= PHY_MAX_ADDR) + return -ENXIO; + if (bus->write_c45) err = bus->write_c45(bus, addr, devad, regnum, val); else From bc4394e5e79cdda1b0997e0be1d65e242f523f02 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 6 Jun 2025 12:25:46 -0700 Subject: [PATCH 163/497] perf: Fix the throttle error of some clock events Both ARM and IBM CI reports RCU stall, which can be reproduced by the below perf command. perf record -a -e cpu-clock -- sleep 2 The issue is introduced by the generic throttle patch set, which unconditionally invoke the event_stop() when throttle is triggered. The cpu-clock and task-clock are two special SW events, which rely on the hrtimer. The throttle is invoked in the hrtimer handler. The event_stop()->hrtimer_cancel() waits for the handler to finish, which is a deadlock. Instead of invoking the stop(), the HRTIMER_NORESTART should be used to stop the timer. There may be two ways to fix it: - Introduce a PMU flag to track the case. Avoid the event_stop in perf_event_throttle() if the flag is detected. It has been implemented in the https://lore.kernel.org/lkml/20250528175832.2999139-1-kan.liang@linux.intel.com/ The new flag was thought to be an overkill for the issue. - Add a check in the event_stop. Return immediately if the throttle is invoked in the hrtimer handler. Rely on the existing HRTIMER_NORESTART method to stop the timer. The latter is implemented here. Move event->hw.interrupts = MAX_INTERRUPTS before the stop(). It makes the order the same as perf_event_unthrottle(). Except the patch, no one checks the hw.interrupts in the stop(). There is no impact from the order change. When stops in the throttle, the event should not be updated, stop(event, 0). But the cpu_clock_event_stop() doesn't handle the flag. In logic, it's wrong. But it didn't bring any problems with the old code, because the stop() was not invoked when handling the throttle. Checking the flag before updating the event. Fixes: 9734e25fbf5a ("perf: Fix the throttle logic for a group") Closes: https://lore.kernel.org/lkml/20250527161656.GJ2566836@e132581.arm.com/ Closes: https://lore.kernel.org/lkml/djxlh5fx326gcenwrr52ry3pk4wxmugu4jccdjysza7tlc5fef@ktp4rffawgcw/ Closes: https://lore.kernel.org/lkml/8e8f51d8-af64-4d9e-934b-c0ee9f131293@linux.ibm.com/ Closes: https://lore.kernel.org/lkml/4ce106d0-950c-aadc-0b6a-f0215cd39913@maine.edu/ Reported-by: Leo Yan Reported-by: Aishwarya TCV Reported-by: Alexei Starovoitov Reported-by: Venkat Rao Bagalkote Reported-by: Vince Weaver Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ian Rogers Link: https://lkml.kernel.org/r/20250606192546.915765-1-kan.liang@linux.intel.com --- kernel/events/core.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index d7cf008f3d75..1f746469fda5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2674,8 +2674,8 @@ static void perf_event_unthrottle(struct perf_event *event, bool start) static void perf_event_throttle(struct perf_event *event) { - event->pmu->stop(event, 0); event->hw.interrupts = MAX_INTERRUPTS; + event->pmu->stop(event, 0); if (event == event->group_leader) perf_log_throttle(event, 0); } @@ -11774,7 +11774,12 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - if (is_sampling_event(event)) { + /* + * The throttle can be triggered in the hrtimer handler. + * The HRTIMER_NORESTART should be used to stop the timer, + * rather than hrtimer_cancel(). See perf_swevent_hrtimer() + */ + if (is_sampling_event(event) && (hwc->interrupts != MAX_INTERRUPTS)) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); @@ -11829,7 +11834,8 @@ static void cpu_clock_event_start(struct perf_event *event, int flags) static void cpu_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); - cpu_clock_event_update(event); + if (flags & PERF_EF_UPDATE) + cpu_clock_event_update(event); } static int cpu_clock_event_add(struct perf_event *event, int flags) @@ -11907,7 +11913,8 @@ static void task_clock_event_start(struct perf_event *event, int flags) static void task_clock_event_stop(struct perf_event *event, int flags) { perf_swevent_cancel_hrtimer(event); - task_clock_event_update(event, event->ctx->time); + if (flags & PERF_EF_UPDATE) + task_clock_event_update(event, event->ctx->time); } static int task_clock_event_add(struct perf_event *event, int flags) From 72f37957007d25f4290e3ba40d40aaec1dd0b0cf Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Tue, 10 Jun 2025 19:59:26 +0800 Subject: [PATCH 164/497] gpio: loongson-64bit: Correct Loongson-7A2000 ACPI GPIO access mode According to the description of the Loongson-7A2000 ACPI GPIO register in the manual, its access mode should be BIT_CTRL_MODE, otherwise there maybe some unpredictable behavior. Cc: stable@vger.kernel.org Fixes: 44fe79020b91 ("gpio: loongson-64bit: Add more gpio chip support") Signed-off-by: Binbin Zhou Reviewed-by: Huacai Chen Link: https://lore.kernel.org/r/20250610115926.347845-1-zhoubinbin@loongson.cn Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-loongson-64bit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-loongson-64bit.c b/drivers/gpio/gpio-loongson-64bit.c index 26227669f026..70a01c5b8ad1 100644 --- a/drivers/gpio/gpio-loongson-64bit.c +++ b/drivers/gpio/gpio-loongson-64bit.c @@ -268,7 +268,7 @@ static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data0 = { /* LS7A2000 ACPI GPIO */ static const struct loongson_gpio_chip_data loongson_gpio_ls7a2000_data1 = { .label = "ls7a2000_gpio", - .mode = BYTE_CTRL_MODE, + .mode = BIT_CTRL_MODE, .conf_offset = 0x4, .in_offset = 0x8, .out_offset = 0x0, From 4bb08cf974c54adc321b8505ce82720c2a15c451 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 11 Jun 2025 16:49:38 +0800 Subject: [PATCH 165/497] loop: move lo_set_size() out of queue freeze It isn't necessary to freeze queue for updating disk size given submit_bio() doesn't grab queue usage counter for checking eod. Also many driver won't freeze queue for calling set_capacity_and_notify(). Move lo_set_size() out of queue freeze for fixing many lockdep warning report. Link: https://lore.kernel.org/linux-block/67ea99e0.050a0220.3c3d88.0042.GAE@google.com/ Reported-by: syzbot+9dd7dbb1a4b915dee638@syzkaller.appspotmail.com Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250611084938.108829-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f8d136684109..500840e4a74e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1248,12 +1248,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS; lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS); - if (size_changed) { - loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, - lo->lo_backing_file); - loop_set_size(lo, new_size); - } - /* update the direct I/O flag if lo_offset changed */ loop_update_dio(lo); @@ -1261,6 +1255,11 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) blk_mq_unfreeze_queue(lo->lo_queue, memflags); if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); + if (!err && size_changed) { + loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, + lo->lo_backing_file); + loop_set_size(lo, new_size); + } out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) From ff20c516485efbeb5c32bcb6aa5a24f73774185b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 11 Jun 2025 16:56:32 +0800 Subject: [PATCH 166/497] ublk: document auto buffer registration(UBLK_F_AUTO_BUF_REG) Document recently merged feature auto buffer registration(UBLK_F_AUTO_BUF_REG). Cc: Caleb Sander Mateos Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250611085632.109719-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- Documentation/block/ublk.rst | 75 ++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index c368e1081b41..abec524a04ed 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -352,6 +352,81 @@ For reaching best IO performance, ublk server should align its segment parameter of `struct ublk_param_segment` with backend for avoiding unnecessary IO split, which usually hurts io_uring performance. +Auto Buffer Registration +------------------------ + +The ``UBLK_F_AUTO_BUF_REG`` feature automatically handles buffer registration +and unregistration for I/O requests, which simplifies the buffer management +process and reduces overhead in the ublk server implementation. + +This is another feature flag for using zero copy, and it is compatible with +``UBLK_F_SUPPORT_ZERO_COPY``. + +Feature Overview +~~~~~~~~~~~~~~~~ + +This feature automatically registers request buffers to the io_uring context +before delivering I/O commands to the ublk server and unregisters them when +completing I/O commands. This eliminates the need for manual buffer +registration/unregistration via ``UBLK_IO_REGISTER_IO_BUF`` and +``UBLK_IO_UNREGISTER_IO_BUF`` commands, then IO handling in ublk server +can avoid dependency on the two uring_cmd operations. + +IOs can't be issued concurrently to io_uring if there is any dependency +among these IOs. So this way not only simplifies ublk server implementation, +but also makes concurrent IO handling becomes possible by removing the +dependency on buffer registration & unregistration commands. + +Usage Requirements +~~~~~~~~~~~~~~~~~~ + +1. The ublk server must create a sparse buffer table on the same ``io_ring_ctx`` + used for ``UBLK_IO_FETCH_REQ`` and ``UBLK_IO_COMMIT_AND_FETCH_REQ``. If + uring_cmd is issued on a different ``io_ring_ctx``, manual buffer + unregistration is required. + +2. Buffer registration data must be passed via uring_cmd's ``sqe->addr`` with the + following structure:: + + struct ublk_auto_buf_reg { + __u16 index; /* Buffer index for registration */ + __u8 flags; /* Registration flags */ + __u8 reserved0; /* Reserved for future use */ + __u32 reserved1; /* Reserved for future use */ + }; + + ublk_auto_buf_reg_to_sqe_addr() is for converting the above structure into + ``sqe->addr``. + +3. All reserved fields in ``ublk_auto_buf_reg`` must be zeroed. + +4. Optional flags can be passed via ``ublk_auto_buf_reg.flags``. + +Fallback Behavior +~~~~~~~~~~~~~~~~~ + +If auto buffer registration fails: + +1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled: + - The uring_cmd is completed + - ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags`` + - The ublk server must manually deal with the failure, such as, register + the buffer manually, or using user copy feature for retrieving the data + for handling ublk IO + +2. If fallback is not enabled: + - The ublk I/O request fails silently + - The uring_cmd won't be completed + +Limitations +~~~~~~~~~~~ + +- Requires same ``io_ring_ctx`` for all operations +- May require manual buffer management in fallback cases +- io_ring_ctx buffer table has a max size of 16K, which may not be enough + in case that too many ublk devices are handled by this single io_ring_ctx + and each one has very large queue depth + References ========== From f705d33c2f0353039d03e5d6f18f70467d86080e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Jun 2025 09:59:15 +0900 Subject: [PATCH 167/497] block: Clear BIO_EMULATES_ZONE_APPEND flag on BIO completion When blk_zone_write_plug_bio_endio() is called for a regular write BIO used to emulate a zone append operation, that is, a BIO flagged with BIO_EMULATES_ZONE_APPEND, the BIO operation code is restored to the original REQ_OP_ZONE_APPEND but the BIO_EMULATES_ZONE_APPEND flag is not cleared. Clear it to fully return the BIO to its orginal definition. Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20250611005915.89843-1-dlemoal@kernel.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 8f15d1aa6eb8..cee7a0774a9b 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1225,6 +1225,7 @@ void blk_zone_write_plug_bio_endio(struct bio *bio) if (bio_flagged(bio, BIO_EMULATES_ZONE_APPEND)) { bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= REQ_OP_ZONE_APPEND; + bio_clear_flag(bio, BIO_EMULATES_ZONE_APPEND); } /* From cf625013d8741c01407bbb4a60c111b61b9fa69d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2025 06:44:16 +0200 Subject: [PATCH 168/497] block: don't use submit_bio_noacct_nocheck in blk_zone_wplug_bio_work Bios queued up in the zone write plug have already gone through all all preparation in the submit_bio path, including the freeze protection. Submitting them through submit_bio_noacct_nocheck duplicates the work and can can cause deadlocks when freezing a queue with pending bio write plugs. Go straight to ->submit_bio or blk_mq_submit_bio to bypass the superfluous extra freeze protection and checks. Fixes: 9b1ce7f0c6f8 ("block: Implement zone append emulation") Reported-by: Bart Van Assche Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20250611044416.2351850-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-zoned.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index cee7a0774a9b..351d659280e1 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -1307,7 +1307,6 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) spin_unlock_irqrestore(&zwplug->lock, flags); bdev = bio->bi_bdev; - submit_bio_noacct_nocheck(bio); /* * blk-mq devices will reuse the extra reference on the request queue @@ -1315,8 +1314,12 @@ static void blk_zone_wplug_bio_work(struct work_struct *work) * path for BIO-based devices will not do that. So drop this extra * reference here. */ - if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) + if (bdev_test_flag(bdev, BD_HAS_SUBMIT_BIO)) { + bdev->bd_disk->fops->submit_bio(bio); blk_queue_exit(bdev->bd_disk->queue); + } else { + blk_mq_submit_bio(bio); + } put_zwplug: /* Drop the reference we took in disk_zone_wplug_schedule_bio_work(). */ From 961296e89dc3800e6a3abc3f5d5bb4192cf31e98 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Jun 2025 08:48:46 -0600 Subject: [PATCH 169/497] block: use plug request list tail for one-shot backmerge attempt Previously, the block layer stored the requests in the plug list in LIFO order. For this reason, blk_attempt_plug_merge() would check just the head entry for a back merge attempt, and abort after that unless requests for multiple queues existed in the plug list. If more than one request is present in the plug list, this makes the one-shot back merging less useful than before, as it'll always fail to find a quick merge candidate. Use the tail entry for the one-shot merge attempt, which is the last added request in the list. If that fails, abort immediately unless there are multiple queues available. If multiple queues are available, then scan the list. Ideally the latter scan would be a backwards scan of the list, but as it currently stands, the plug list is singly linked and hence this isn't easily feasible. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-block/20250611121626.7252-1-abuehaze@amazon.com/ Reported-by: Hazem Mohamed Abuelfotoh Fixes: e70c301faece ("block: don't reorder requests in blk_add_rq_to_plug") Signed-off-by: Jens Axboe --- block/blk-merge.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 3af1d284add5..70d704615be5 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -998,20 +998,20 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, if (!plug || rq_list_empty(&plug->mq_list)) return false; - rq_list_for_each(&plug->mq_list, rq) { - if (rq->q == q) { - if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == - BIO_MERGE_OK) - return true; - break; - } + rq = plug->mq_list.tail; + if (rq->q == q) + return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK; + else if (!plug->multiple_queues) + return false; - /* - * Only keep iterating plug list for merges if we have multiple - * queues - */ - if (!plug->multiple_queues) - break; + rq_list_for_each(&plug->mq_list, rq) { + if (rq->q != q) + continue; + if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == + BIO_MERGE_OK) + return true; + break; } return false; } From 69a14d146f3b87819f3fb73ed5d1de3e1fa680c1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 2 Jun 2025 13:00:27 +0200 Subject: [PATCH 170/497] futex: Verify under the lock if hash can be replaced Once the global hash is requested there is no way back to switch back to the per-task private hash. This is checked at the begin of the function. It is possible that two threads simultaneously request the global hash and both pass the initial check and block later on the mm::futex_hash_lock. In this case the first thread performs the switch to the global hash. The second thread will also attempt to switch to the global hash and while doing so, accessing the nonexisting slot 1 of the struct futex_private_hash. The same applies if the hash is made immutable: There is no reference counting and the hash must not be replaced. Verify under mm_struct::futex_phash that neither the global hash nor an immutable hash in use. Tested-by: "Lai, Yi" Reported-by: "Lai, Yi" Closes: https://lore.kernel.org/all/aDwDw9Aygqo6oAx+@ly-workstation/ Fixes: bd54df5ea7cad ("futex: Allow to resize the private local hash") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250610104400.1077266-5-bigeasy@linutronix.de/ --- kernel/futex/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index b652d2f60c40..90d53fb0ee9e 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1629,6 +1629,16 @@ static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags) mm->futex_phash_new = NULL; if (fph) { + if (cur && (!cur->hash_mask || cur->immutable)) { + /* + * If two threads simultaneously request the global + * hash then the first one performs the switch, + * the second one returns here. + */ + free = fph; + mm->futex_phash_new = new; + return -EBUSY; + } if (cur && !new) { /* * If we have an existing hash, but do not yet have From 83f066fac3c231e58e9adf3b307e96fee172dfb3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Jun 2025 16:09:45 +0300 Subject: [PATCH 171/497] spi: stm32-ospi: clean up on error in probe() If reset_control_acquire() fails, then we can't return directly. We need to do a little clean up first. Fixes: cf2c3eceb757 ("spi: stm32-ospi: Make usage of reset_control_acquire/release() API") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aEmAGTUzzKZlLe3K@stanley.mountain Signed-off-by: Mark Brown --- drivers/spi/spi-stm32-ospi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-stm32-ospi.c b/drivers/spi/spi-stm32-ospi.c index db6b1cfc970f..4ab7e86f4bd5 100644 --- a/drivers/spi/spi-stm32-ospi.c +++ b/drivers/spi/spi-stm32-ospi.c @@ -937,8 +937,10 @@ static int stm32_ospi_probe(struct platform_device *pdev) goto err_pm_enable; ret = reset_control_acquire(ospi->rstc); - if (ret) - return dev_err_probe(dev, ret, "Can not acquire reset %d\n", ret); + if (ret) { + dev_err_probe(dev, ret, "Can not acquire reset %d\n", ret); + goto err_pm_resume; + } reset_control_assert(ospi->rstc); udelay(2); From 179a8427fcbffe36ccfed5e138d7c9b6180caff9 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 12 May 2025 22:16:34 +0000 Subject: [PATCH 172/497] KVM: SEV: Disable SEV-SNP support on initialization failure During platform init, SNP initialization may fail for several reasons, such as firmware command failures and incompatible versions. However, the KVM capability may continue to advertise support for it. The platform may have SNP enabled but if SNP_INIT fails then SNP is not supported by KVM. During KVM module initialization query the SNP platform status to obtain the SNP initialization state and use it as an additional condition to determine support for SEV-SNP. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Co-developed-by: Pratik R. Sampat Signed-off-by: Pratik R. Sampat Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Reviewed-by: Pankaj Gupta Reviewed-by: Pavan Kumar Paluri Message-ID: <20250512221634.12045-1-Ashish.Kalra@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 44 +++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5a69b657dae9..459c3b791fd4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2871,6 +2871,33 @@ void __init sev_set_cpu_caps(void) } } +static bool is_sev_snp_initialized(void) +{ + struct sev_user_data_snp_status *status; + struct sev_data_snp_addr buf; + bool initialized = false; + int ret, error = 0; + + status = snp_alloc_firmware_page(GFP_KERNEL | __GFP_ZERO); + if (!status) + return false; + + buf.address = __psp_pa(status); + ret = sev_do_cmd(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &error); + if (ret) { + pr_err("SEV: SNP_PLATFORM_STATUS failed ret=%d, fw_error=%d (%#x)\n", + ret, error, error); + goto out; + } + + initialized = !!status->state; + +out: + snp_free_firmware_page(status); + + return initialized; +} + void __init sev_hardware_setup(void) { unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count; @@ -2975,6 +3002,14 @@ void __init sev_hardware_setup(void) sev_snp_supported = sev_snp_enabled && cc_platform_has(CC_ATTR_HOST_SEV_SNP); out: + if (sev_enabled) { + init_args.probe = true; + if (sev_platform_init(&init_args)) + sev_supported = sev_es_supported = sev_snp_supported = false; + else if (sev_snp_supported) + sev_snp_supported = is_sev_snp_initialized(); + } + if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : @@ -3001,15 +3036,6 @@ void __init sev_hardware_setup(void) sev_supported_vmsa_features = 0; if (sev_es_debug_swap_enabled) sev_supported_vmsa_features |= SVM_SEV_FEAT_DEBUG_SWAP; - - if (!sev_enabled) - return; - - /* - * Do both SNP and SEV initialization at KVM module load. - */ - init_args.probe = true; - sev_platform_init(&init_args); } void sev_hardware_unsetup(void) From 488ef3560196ee10fc1c5547e1574a87068c3494 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 28 May 2025 13:18:17 +0100 Subject: [PATCH 173/497] KEYS: Invert FINAL_PUT bit Invert the FINAL_PUT bit so that test_bit_acquire and clear_bit_unlock can be used instead of smp_mb. Signed-off-by: Herbert Xu Signed-off-by: David Howells Reviewed-by: Jarkko Sakkinen Signed-off-by: Linus Torvalds --- include/linux/key.h | 2 +- security/keys/gc.c | 4 ++-- security/keys/key.c | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index ba05de8579ec..81b8f05c6898 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,7 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ -#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ +#define KEY_FLAG_USER_ALIVE 10 /* set if final put has not happened on key yet */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/security/keys/gc.c b/security/keys/gc.c index f27223ea4578..748e83818a76 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -218,8 +218,8 @@ static void key_garbage_collector(struct work_struct *work) key = rb_entry(cursor, struct key, serial_node); cursor = rb_next(cursor); - if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) { - smp_mb(); /* Clobber key->user after FINAL_PUT seen. */ + if (!test_bit_acquire(KEY_FLAG_USER_ALIVE, &key->flags)) { + /* Clobber key->user after final put seen. */ goto found_unreferenced_key; } diff --git a/security/keys/key.c b/security/keys/key.c index 7198cd2ac3a3..3bbdde778631 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -298,6 +298,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->restrict_link = restrict_link; key->last_used_at = ktime_get_real_seconds(); + key->flags |= 1 << KEY_FLAG_USER_ALIVE; if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; if (flags & KEY_ALLOC_BUILT_IN) @@ -658,8 +659,8 @@ void key_put(struct key *key) key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } - smp_mb(); /* key->user before FINAL_PUT set. */ - set_bit(KEY_FLAG_FINAL_PUT, &key->flags); + /* Mark key as safe for GC after key->user done. */ + clear_bit_unlock(KEY_FLAG_USER_ALIVE, &key->flags); schedule_work(&key_gc_work); } } From 62a65b32bddb0f242b106b8c464913f2f01c108d Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Fri, 6 Jun 2025 11:11:17 -0700 Subject: [PATCH 174/497] dt-bindings: pmem: Convert binding to YAML Convert the PMEM device tree binding from text to YAML. This will allow device trees with pmem-region nodes to pass dtbs_check. [iweiny: fix ups from Rob/Drew] Acked-by: Conor Dooley Acked-by: Oliver O'Halloran Signed-off-by: Drew Fustini Acked-by: Dan Williams Link: https://patch.msgid.link/20250606184405.359812-4-drew@pdp7.com Signed-off-by: Ira Weiny --- .../devicetree/bindings/pmem/pmem-region.txt | 65 ------------------- .../devicetree/bindings/pmem/pmem-region.yaml | 48 ++++++++++++++ MAINTAINERS | 2 +- 3 files changed, 49 insertions(+), 66 deletions(-) delete mode 100644 Documentation/devicetree/bindings/pmem/pmem-region.txt create mode 100644 Documentation/devicetree/bindings/pmem/pmem-region.yaml diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt deleted file mode 100644 index cd79975e85ec..000000000000 --- a/Documentation/devicetree/bindings/pmem/pmem-region.txt +++ /dev/null @@ -1,65 +0,0 @@ -Device-tree bindings for persistent memory regions ------------------------------------------------------ - -Persistent memory refers to a class of memory devices that are: - - a) Usable as main system memory (i.e. cacheable), and - b) Retain their contents across power failure. - -Given b) it is best to think of persistent memory as a kind of memory mapped -storage device. To ensure data integrity the operating system needs to manage -persistent regions separately to the normal memory pool. To aid with that this -binding provides a standardised interface for discovering where persistent -memory regions exist inside the physical address space. - -Bindings for the region nodes: ------------------------------ - -Required properties: - - compatible = "pmem-region" - - - reg = ; - The reg property should specify an address range that is - translatable to a system physical address range. This address - range should be mappable as normal system memory would be - (i.e cacheable). - - If the reg property contains multiple address ranges - each address range will be treated as though it was specified - in a separate device node. Having multiple address ranges in a - node implies no special relationship between the two ranges. - -Optional properties: - - Any relevant NUMA associativity properties for the target platform. - - - volatile; This property indicates that this region is actually - backed by non-persistent memory. This lets the OS know that it - may skip the cache flushes required to ensure data is made - persistent after a write. - - If this property is absent then the OS must assume that the region - is backed by non-volatile memory. - -Examples: --------------------- - - /* - * This node specifies one 4KB region spanning from - * 0x5000 to 0x5fff that is backed by non-volatile memory. - */ - pmem@5000 { - compatible = "pmem-region"; - reg = <0x00005000 0x00001000>; - }; - - /* - * This node specifies two 4KB regions that are backed by - * volatile (normal) memory. - */ - pmem@6000 { - compatible = "pmem-region"; - reg = < 0x00006000 0x00001000 - 0x00008000 0x00001000 >; - volatile; - }; - diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.yaml b/Documentation/devicetree/bindings/pmem/pmem-region.yaml new file mode 100644 index 000000000000..bd0f0c793f03 --- /dev/null +++ b/Documentation/devicetree/bindings/pmem/pmem-region.yaml @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pmem-region.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +maintainers: + - Oliver O'Halloran + +title: Persistent Memory Regions + +description: | + Persistent memory refers to a class of memory devices that are: + + a) Usable as main system memory (i.e. cacheable), and + b) Retain their contents across power failure. + + Given b) it is best to think of persistent memory as a kind of memory mapped + storage device. To ensure data integrity the operating system needs to manage + persistent regions separately to the normal memory pool. To aid with that this + binding provides a standardised interface for discovering where persistent + memory regions exist inside the physical address space. + +properties: + compatible: + const: pmem-region + + reg: + maxItems: 1 + + volatile: + description: + Indicates the region is volatile (non-persistent) and the OS can skip + cache flushes for writes + type: boolean + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + pmem@5000 { + compatible = "pmem-region"; + reg = <0x00005000 0x00001000>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..73ded8134ffd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13798,7 +13798,7 @@ M: Oliver O'Halloran L: nvdimm@lists.linux.dev S: Supported Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ -F: Documentation/devicetree/bindings/pmem/pmem-region.txt +F: Documentation/devicetree/bindings/pmem/pmem-region.yaml F: drivers/nvdimm/of_pmem.c LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM From 20a2aa01f5aeb6daad9aeaa7c33dd512c58d81eb Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Jun 2025 11:14:25 -0400 Subject: [PATCH 175/497] Bluetooth: Fix NULL pointer deference on eir_get_service_data The len parameter is considered optional so it can be NULL so it cannot be used for skipping to next entry of EIR_SERVICE_DATA. Fixes: 8f9ae5b3ae80 ("Bluetooth: eir: Add helpers for managing service data") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/eir.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 1bc51e2b05a3..3e1713673ecc 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -366,17 +366,19 @@ u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr) void *eir_get_service_data(u8 *eir, size_t eir_len, u16 uuid, size_t *len) { - while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, len))) { + size_t dlen; + + while ((eir = eir_get_data(eir, eir_len, EIR_SERVICE_DATA, &dlen))) { u16 value = get_unaligned_le16(eir); if (uuid == value) { if (len) - *len -= 2; + *len = dlen - 2; return &eir[2]; } - eir += *len; - eir_len -= *len; + eir += dlen; + eir_len -= dlen; } return NULL; From 5725bc608252050ed8a4d47d59225b7dd73474c8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 5 Jun 2025 11:15:16 -0400 Subject: [PATCH 176/497] Bluetooth: hci_sync: Fix broadcast/PA when using an existing instance When using and existing adv_info instance for broadcast source it needs to be updated to periodic first before it can be reused, also in case the existing instance already have data hci_set_adv_instance_data cannot be used directly since it would overwrite the existing data so this reappend the original data after the Broadcast ID, if one was generated. Example: bluetoothctl># Add PBP to EA so it can be later referenced as the BIS ID bluetoothctl> advertise.service 0x1856 0x00 0x00 bluetoothctl> advertise on ... < HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 13 Handle: 0x01 Operation: Complete extended advertising data (0x03) Fragment preference: Minimize fragmentation (0x01) Data length: 0x09 Service Data: Public Broadcast Announcement (0x1856) Data[2]: 0000 Flags: 0x06 LE General Discoverable Mode BR/EDR Not Supported ... bluetoothctl># Attempt to acquire Broadcast Source transport bluetoothctl>transport.acquire /org/bluez/hci0/pac_bcast0/fd0 ... < HCI Command: LE Set Extended Advertising Data (0x08|0x0037) plen 255 Handle: 0x01 Operation: Complete extended advertising data (0x03) Fragment preference: Minimize fragmentation (0x01) Data length: 0x0e Service Data: Broadcast Audio Announcement (0x1852) Broadcast ID: 11371620 (0xad8464) Service Data: Public Broadcast Announcement (0x1856) Data[2]: 0000 Flags: 0x06 LE General Discoverable Mode BR/EDR Not Supported Link: https://github.com/bluez/bluez/issues/1117 Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 62d1ff951ebe..8ba1c3aa7801 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1559,7 +1559,8 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) { u8 bid[3]; - u8 ad[4 + 3]; + u8 ad[HCI_MAX_EXT_AD_LENGTH]; + u8 len; /* Skip if NULL adv as instance 0x00 is used for general purpose * advertising so it cannot used for the likes of Broadcast Announcement @@ -1585,8 +1586,10 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) /* Generate Broadcast ID */ get_random_bytes(bid, sizeof(bid)); - eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); - hci_set_adv_instance_data(hdev, adv->instance, sizeof(ad), ad, 0, NULL); + len = eir_append_service_data(ad, 0, 0x1852, bid, sizeof(bid)); + memcpy(ad + len, adv->adv_data, adv->adv_data_len); + hci_set_adv_instance_data(hdev, adv->instance, len + adv->adv_data_len, + ad, 0, NULL); return hci_update_adv_data_sync(hdev, adv->instance); } @@ -1603,8 +1606,15 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, if (instance) { adv = hci_find_adv_instance(hdev, instance); - /* Create an instance if that could not be found */ - if (!adv) { + if (adv) { + /* Turn it into periodic advertising */ + adv->periodic = true; + adv->per_adv_data_len = data_len; + if (data) + memcpy(adv->per_adv_data, data, data_len); + adv->flags = flags; + } else if (!adv) { + /* Create an instance if that could not be found */ adv = hci_add_per_instance(hdev, instance, flags, data_len, data, sync_interval, From 47c03902269aff377f959dc3fd94a9733aa31d6e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 10 Jun 2025 10:14:35 -0400 Subject: [PATCH 177/497] Bluetooth: eir: Fix possible crashes on eir_create_adv_data eir_create_adv_data may attempt to add EIR_FLAGS and EIR_TX_POWER without checking if that would fit. Link: https://github.com/bluez/bluez/issues/1117#issuecomment-2958244066 Fixes: 01ce70b0a274 ("Bluetooth: eir: Move EIR/Adv Data functions to its own file") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/eir.c | 7 ++++--- net/bluetooth/eir.h | 2 +- net/bluetooth/hci_sync.c | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bluetooth/eir.c b/net/bluetooth/eir.c index 3e1713673ecc..3f72111ba651 100644 --- a/net/bluetooth/eir.c +++ b/net/bluetooth/eir.c @@ -242,7 +242,7 @@ u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) return ad_len; } -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size) { struct adv_info *adv = NULL; u8 ad_len = 0, flags = 0; @@ -286,7 +286,7 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) /* If flags would still be empty, then there is no need to * include the "Flags" AD field". */ - if (flags) { + if (flags && (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_FLAGS; ptr[2] = flags; @@ -316,7 +316,8 @@ u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) } /* Provide Tx Power only if we can provide a valid value for it */ - if (adv_tx_power != HCI_TX_POWER_INVALID) { + if (adv_tx_power != HCI_TX_POWER_INVALID && + (ad_len + eir_precalc_len(1) <= size)) { ptr[0] = 0x02; ptr[1] = EIR_TX_POWER; ptr[2] = (u8)adv_tx_power; diff --git a/net/bluetooth/eir.h b/net/bluetooth/eir.h index 5c89a05e8b29..9372db83f912 100644 --- a/net/bluetooth/eir.h +++ b/net/bluetooth/eir.h @@ -9,7 +9,7 @@ void eir_create(struct hci_dev *hdev, u8 *data); -u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); +u8 eir_create_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr, u8 size); u8 eir_create_scan_rsp(struct hci_dev *hdev, u8 instance, u8 *ptr); u8 eir_create_per_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 8ba1c3aa7801..83de3847c8ea 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1822,7 +1822,8 @@ static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) return 0; } - len = eir_create_adv_data(hdev, instance, pdu->data); + len = eir_create_adv_data(hdev, instance, pdu->data, + HCI_MAX_EXT_AD_LENGTH); pdu->length = len; pdu->handle = adv ? adv->handle : instance; @@ -1853,7 +1854,7 @@ static int hci_set_adv_data_sync(struct hci_dev *hdev, u8 instance) memset(&cp, 0, sizeof(cp)); - len = eir_create_adv_data(hdev, instance, cp.data); + len = eir_create_adv_data(hdev, instance, cp.data, sizeof(cp.data)); /* There's nothing to do if the data hasn't changed */ if (hdev->adv_data_len == len && From 2df108c227b266a9ec9e3fda3828d2ac9662aa33 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 9 Jun 2025 14:53:53 -0400 Subject: [PATCH 178/497] Bluetooth: ISO: Fix using BT_SK_PA_SYNC to detect BIS sockets BT_SK_PA_SYNC is only valid for Broadcast Sinks which means socket used for Broadcast Sources wouldn't be able to use the likes of getpeername to read out the sockaddr_iso_bc fields which may have been update (e.g. bc_sid). Fixes: 0a766a0affb5 ("Bluetooth: ISO: Fix getpeername not returning sockaddr_iso_bc fields") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 6e2c752aaa8f..affa2077e3a2 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1337,10 +1337,13 @@ static int iso_sock_getname(struct socket *sock, struct sockaddr *addr, addr->sa_family = AF_BLUETOOTH; if (peer) { + struct hci_conn *hcon = iso_pi(sk)->conn ? + iso_pi(sk)->conn->hcon : NULL; + bacpy(&sa->iso_bdaddr, &iso_pi(sk)->dst); sa->iso_bdaddr_type = iso_pi(sk)->dst_type; - if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + if (hcon && hcon->type == BIS_LINK) { sa->iso_bc->bc_sid = iso_pi(sk)->bc_sid; sa->iso_bc->bc_num_bis = iso_pi(sk)->bc_num_bis; memcpy(sa->iso_bc->bc_bis, iso_pi(sk)->bc_bis, From 5842c01a9ed1d515c8ba2d6d3733eac78ace89c1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 6 Jun 2025 14:32:49 -0400 Subject: [PATCH 179/497] Bluetooth: ISO: Fix not using bc_sid as advertisement SID Currently bc_sid is being ignore when acting as Broadcast Source role, so this fix it by passing the bc_sid and then use it when programming the PA: < HCI Command: LE Set Exte.. (0x08|0x0036) plen 25 Handle: 0x01 Properties: 0x0000 Min advertising interval: 140.000 msec (0x00e0) Max advertising interval: 140.000 msec (0x00e0) Channel map: 37, 38, 39 (0x07) Own address type: Random (0x01) Peer address type: Public (0x00) Peer address: 00:00:00:00:00:00 (OUI 00-00-00) Filter policy: Allow Scan Request from Any, Allow Connect Request from Any (0x00) TX power: Host has no preference (0x7f) Primary PHY: LE 1M (0x01) Secondary max skip: 0x00 Secondary PHY: LE 2M (0x02) SID: 0x01 Scan request notifications: Disabled (0x00) Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 9 ++++++--- include/net/bluetooth/hci_sync.h | 4 ++-- net/bluetooth/hci_conn.c | 31 ++++++++++++++++++++++++------- net/bluetooth/hci_core.c | 16 +++++++++++++++- net/bluetooth/hci_sync.c | 20 +++++++++++++++++--- net/bluetooth/iso.c | 12 ++++++++---- 6 files changed, 72 insertions(+), 20 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f7b1a9eb9543..a760f05fa3fb 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -242,6 +242,7 @@ struct adv_info { __u8 mesh; __u8 instance; __u8 handle; + __u8 sid; __u32 flags; __u16 timeout; __u16 remaining_time; @@ -1551,13 +1552,14 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst, u16 timeout); struct hci_conn *hci_bind_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); -struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, +struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, __u8 base_len, __u8 *base); struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, struct bt_iso_qos *qos); struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos, + __u8 dst_type, __u8 sid, + struct bt_iso_qos *qos, __u8 data_len, __u8 *data); struct hci_conn *hci_pa_create_sync(struct hci_dev *hdev, bdaddr_t *dst, __u8 dst_type, __u8 sid, struct bt_iso_qos *qos); @@ -1832,6 +1834,7 @@ int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, void hci_adv_instances_clear(struct hci_dev *hdev); struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance); +struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid); struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance); struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, @@ -1839,7 +1842,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u16 timeout, u16 duration, s8 tx_power, u32 min_interval, u32 max_interval, u8 mesh_handle); -struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval); int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 72558c826aa1..5224f57f6af2 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -115,8 +115,8 @@ int hci_enable_ext_advertising_sync(struct hci_dev *hdev, u8 instance); int hci_enable_advertising_sync(struct hci_dev *hdev); int hci_enable_advertising(struct hci_dev *hdev); -int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, - u8 *data, u32 flags, u16 min_interval, +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid, + u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval); int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 99efeed6a766..4f379184df5b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1501,8 +1501,8 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos) /* This function requires the caller holds hdev->lock */ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, - struct bt_iso_qos *qos, __u8 base_len, - __u8 *base) + __u8 sid, struct bt_iso_qos *qos, + __u8 base_len, __u8 *base) { struct hci_conn *conn; int err; @@ -1543,6 +1543,7 @@ static struct hci_conn *hci_add_bis(struct hci_dev *hdev, bdaddr_t *dst, return conn; conn->state = BT_CONNECT; + conn->sid = sid; hci_conn_hold(conn); return conn; @@ -2062,7 +2063,8 @@ static int create_big_sync(struct hci_dev *hdev, void *data) if (qos->bcast.bis) sync_interval = interval * 4; - err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->le_per_adv_data_len, + err = hci_start_per_adv_sync(hdev, qos->bcast.bis, conn->sid, + conn->le_per_adv_data_len, conn->le_per_adv_data, flags, interval, interval, sync_interval); if (err) @@ -2134,7 +2136,7 @@ static void create_big_complete(struct hci_dev *hdev, void *data, int err) } } -struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, +struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, __u8 sid, struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { @@ -2156,7 +2158,7 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst, base, base_len); /* We need hci_conn object using the BDADDR_ANY as dst */ - conn = hci_add_bis(hdev, dst, qos, base_len, eir); + conn = hci_add_bis(hdev, dst, sid, qos, base_len, eir); if (IS_ERR(conn)) return conn; @@ -2207,20 +2209,35 @@ static void bis_mark_per_adv(struct hci_conn *conn, void *data) } struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst, - __u8 dst_type, struct bt_iso_qos *qos, + __u8 dst_type, __u8 sid, + struct bt_iso_qos *qos, __u8 base_len, __u8 *base) { struct hci_conn *conn; int err; struct iso_list_data data; - conn = hci_bind_bis(hdev, dst, qos, base_len, base); + conn = hci_bind_bis(hdev, dst, sid, qos, base_len, base); if (IS_ERR(conn)) return conn; if (conn->state == BT_CONNECTED) return conn; + /* Check if SID needs to be allocated then search for the first + * available. + */ + if (conn->sid == HCI_SID_INVALID) { + u8 sid; + + for (sid = 0; sid <= 0x0f; sid++) { + if (!hci_find_adv_sid(hdev, sid)) { + conn->sid = sid; + break; + } + } + } + data.big = qos->bcast.big; data.bis = qos->bcast.bis; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 487c045a7ba8..07a8b4281a39 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1584,6 +1584,19 @@ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance) return NULL; } +/* This function requires the caller holds hdev->lock */ +struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid) +{ + struct adv_info *adv; + + list_for_each_entry(adv, &hdev->adv_instances, list) { + if (adv->sid == sid) + return adv; + } + + return NULL; +} + /* This function requires the caller holds hdev->lock */ struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) { @@ -1736,7 +1749,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, } /* This function requires the caller holds hdev->lock */ -struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, +struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval) { @@ -1748,6 +1761,7 @@ struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, if (IS_ERR(adv)) return adv; + adv->sid = sid; adv->periodic = true; adv->per_adv_data_len = data_len; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 83de3847c8ea..6687f2a4d1eb 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1261,10 +1261,12 @@ int hci_setup_ext_adv_instance_sync(struct hci_dev *hdev, u8 instance) hci_cpu_to_le24(adv->min_interval, cp.min_interval); hci_cpu_to_le24(adv->max_interval, cp.max_interval); cp.tx_power = adv->tx_power; + cp.sid = adv->sid; } else { hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; + cp.sid = 0x00; } secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK); @@ -1594,8 +1596,8 @@ static int hci_adv_bcast_annoucement(struct hci_dev *hdev, struct adv_info *adv) return hci_update_adv_data_sync(hdev, adv->instance); } -int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, - u8 *data, u32 flags, u16 min_interval, +int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 sid, + u8 data_len, u8 *data, u32 flags, u16 min_interval, u16 max_interval, u16 sync_interval) { struct adv_info *adv = NULL; @@ -1607,6 +1609,18 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, if (instance) { adv = hci_find_adv_instance(hdev, instance); if (adv) { + if (sid != HCI_SID_INVALID && adv->sid != sid) { + /* If the SID don't match attempt to find by + * SID. + */ + adv = hci_find_adv_sid(hdev, sid); + if (!adv) { + bt_dev_err(hdev, + "Unable to find adv_info"); + return -EINVAL; + } + } + /* Turn it into periodic advertising */ adv->periodic = true; adv->per_adv_data_len = data_len; @@ -1615,7 +1629,7 @@ int hci_start_per_adv_sync(struct hci_dev *hdev, u8 instance, u8 data_len, adv->flags = flags; } else if (!adv) { /* Create an instance if that could not be found */ - adv = hci_add_per_instance(hdev, instance, flags, + adv = hci_add_per_instance(hdev, instance, sid, flags, data_len, data, sync_interval, sync_interval); diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index affa2077e3a2..3c2c98eecc62 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -336,7 +336,7 @@ static int iso_connect_bis(struct sock *sk) struct hci_dev *hdev; int err; - BT_DBG("%pMR", &iso_pi(sk)->src); + BT_DBG("%pMR (SID 0x%2.2x)", &iso_pi(sk)->src, iso_pi(sk)->bc_sid); hdev = hci_get_route(&iso_pi(sk)->dst, &iso_pi(sk)->src, iso_pi(sk)->src_type); @@ -365,7 +365,7 @@ static int iso_connect_bis(struct sock *sk) /* Just bind if DEFER_SETUP has been set */ if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { - hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst, + hcon = hci_bind_bis(hdev, &iso_pi(sk)->dst, iso_pi(sk)->bc_sid, &iso_pi(sk)->qos, iso_pi(sk)->base_len, iso_pi(sk)->base); if (IS_ERR(hcon)) { @@ -375,12 +375,16 @@ static int iso_connect_bis(struct sock *sk) } else { hcon = hci_connect_bis(hdev, &iso_pi(sk)->dst, le_addr_type(iso_pi(sk)->dst_type), - &iso_pi(sk)->qos, iso_pi(sk)->base_len, - iso_pi(sk)->base); + iso_pi(sk)->bc_sid, &iso_pi(sk)->qos, + iso_pi(sk)->base_len, iso_pi(sk)->base); if (IS_ERR(hcon)) { err = PTR_ERR(hcon); goto unlock; } + + /* Update SID if it was not set */ + if (iso_pi(sk)->bc_sid == HCI_SID_INVALID) + iso_pi(sk)->bc_sid = hcon->sid; } conn = iso_conn_add(hcon); From 7dd38ba4acbea9875b4ee061e20a26413e39d9f4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 11 Jun 2025 16:36:27 -0400 Subject: [PATCH 180/497] Bluetooth: MGMT: Fix sparse errors This fixes the following errors: net/bluetooth/mgmt.c:5400:59: sparse: sparse: incorrect type in argument 3 (different base types) @@ expected unsigned short [usertype] handle @@ got restricted __le16 [usertype] monitor_handle @@ net/bluetooth/mgmt.c:5400:59: sparse: expected unsigned short [usertype] handle net/bluetooth/mgmt.c:5400:59: sparse: got restricted __le16 [usertype] monitor_handle Fixes: e6ed54e86aae ("Bluetooth: MGMT: Fix UAF on mgmt_remove_adv_monitor_complete") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506060347.ux2O1p7L-lkp@intel.com/ Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index de7adb9a47f9..d540f7b4f75f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5102,11 +5102,11 @@ static void mgmt_adv_monitor_added(struct sock *sk, struct hci_dev *hdev, } static void mgmt_adv_monitor_removed(struct sock *sk, struct hci_dev *hdev, - u16 handle) + __le16 handle) { struct mgmt_ev_adv_monitor_removed ev; - ev.monitor_handle = cpu_to_le16(handle); + ev.monitor_handle = handle; mgmt_event(MGMT_EV_ADV_MONITOR_REMOVED, hdev, &ev, sizeof(ev), sk); } From 47096d301e39f96962ca1fd6c7b71bfa796c53db Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Jun 2025 11:05:16 -1000 Subject: [PATCH 181/497] sched_ext: Update mailing list entry in MAINTAINERS Use sched-ext@lists.linux.dev instead of LKML. Signed-off-by: Tejun Heo --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..8841f24d408b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22171,7 +22171,7 @@ R: Tejun Heo R: David Vernet R: Andrea Righi R: Changwoo Min -L: linux-kernel@vger.kernel.org +L: sched-ext@lists.linux.dev S: Maintained W: https://github.com/sched-ext/scx T: git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext.git From f37258133c1e95e61db532e14067e28b4881bf24 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 10 Jun 2025 18:15:06 +0300 Subject: [PATCH 182/497] net/mlx5: Ensure fw pages are always allocated on same NUMA When firmware asks the driver to allocate more pages, using event of give_pages, the driver should always allocate it from same NUMA, the original device NUMA. Current code uses dev_to_node() which can result in different NUMA as it is changed by other driver flows, such as mlx5_dma_zalloc_coherent_node(). Instead, use saved numa node for allocating firmware pages. Fixes: 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node") Signed-off-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-2-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 972e8e9df585..9bc9bd83c232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); - int nid = dev_to_node(device); + int nid = dev->priv.numa_node; struct page *page; u64 zero_addr = 1; u64 addr; From 687560d8a9a2d654829ad0da1ec24242f1de711d Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Tue, 10 Jun 2025 18:15:07 +0300 Subject: [PATCH 183/497] net/mlx5: Fix ECVF vports unload on shutdown flow Fix shutdown flow UAF when a virtual function is created on the embedded chip (ECVF) of a BlueField device. In such case the vport acl ingress table is not properly destroyed. ECVF functionality is independent of ecpf_vport_exists capability and thus functions mlx5_eswitch_(enable|disable)_pf_vf_vports() should not test it when enabling/disabling ECVF vports. kernel log: [] refcount_t: underflow; use-after-free. [] WARNING: CPU: 3 PID: 1 at lib/refcount.c:28 refcount_warn_saturate+0x124/0x220 ---------------- [] Call trace: [] refcount_warn_saturate+0x124/0x220 [] tree_put_node+0x164/0x1e0 [mlx5_core] [] mlx5_destroy_flow_table+0x98/0x2c0 [mlx5_core] [] esw_acl_ingress_table_destroy+0x28/0x40 [mlx5_core] [] esw_acl_ingress_lgcy_cleanup+0x80/0xf4 [mlx5_core] [] esw_legacy_vport_acl_cleanup+0x44/0x60 [mlx5_core] [] esw_vport_cleanup+0x64/0x90 [mlx5_core] [] mlx5_esw_vport_disable+0xc0/0x1d0 [mlx5_core] [] mlx5_eswitch_unload_ec_vf_vports+0xcc/0x150 [mlx5_core] [] mlx5_eswitch_disable_sriov+0x198/0x2a0 [mlx5_core] [] mlx5_device_disable_sriov+0xb8/0x1e0 [mlx5_core] [] mlx5_sriov_detach+0x40/0x50 [mlx5_core] [] mlx5_unload+0x40/0xc4 [mlx5_core] [] mlx5_unload_one_devl_locked+0x6c/0xe4 [mlx5_core] [] mlx5_unload_one+0x3c/0x60 [mlx5_core] [] shutdown+0x7c/0xa4 [mlx5_core] [] pci_device_shutdown+0x3c/0xa0 [] device_shutdown+0x170/0x340 [] __do_sys_reboot+0x1f4/0x2a0 [] __arm64_sys_reboot+0x2c/0x40 [] invoke_syscall+0x78/0x100 [] el0_svc_common.constprop.0+0x54/0x184 [] do_el0_svc+0x30/0xac [] el0_svc+0x48/0x160 [] el0t_64_sync_handler+0xa4/0x12c [] el0t_64_sync+0x1a4/0x1a8 [] --[ end trace 9c4601d68c70030e ]--- Fixes: a7719b29a821 ("net/mlx5: Add management of EC VF vports") Reviewed-by: Daniel Jurgens Reviewed-by: Moshe Shemesh Signed-off-by: Amir Tzin Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-3-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7fb8a3381f84..4917d185d0c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1295,12 +1295,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, - enabled_events); - if (ret) - goto ec_vf_err; - } + } + + /* Enable ECVF vports */ + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; } /* Enable VF vports */ @@ -1331,9 +1334,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs); + if (mlx5_ecpf_vport_exists(esw->dev)) { - if (mlx5_core_ec_sriov_enabled(esw->dev)) - mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } From 8ec40e3f1f72bf8f8accf18020d487caa99f46a4 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Tue, 10 Jun 2025 18:15:08 +0300 Subject: [PATCH 184/497] net/mlx5: Fix return value when searching for existing flow group When attempting to add a rule to an existing flow group, if a matching flow group exists but is not active, the error code returned should be EAGAIN, so that the rule can be added to the matching flow group once it is active, rather than ENOENT, which indicates that no matching flow group was found. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Gavi Teitz Signed-off-by: Roi Dayan Signed-off-by: Patrisious Haddad Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-4-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 23a7e8e7adfa..a8046200d376 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2228,6 +2228,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; struct match_list *iter; bool take_write = false; + bool try_again = false; struct fs_fte *fte; u64 version = 0; int err; @@ -2292,6 +2293,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); if (!g->node.active) { + try_again = true; up_write_ref_node(&g->node, false); continue; } @@ -2313,7 +2315,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, tree_put_node(&fte->node, false); return rule; } - rule = ERR_PTR(-ENOENT); + err = try_again ? -EAGAIN : -ENOENT; + rule = ERR_PTR(err); out: kmem_cache_free(steering->ftes_cache, fte); return rule; From a002602676cdae0c9996adb75b9310559b718a93 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Tue, 10 Jun 2025 18:15:09 +0300 Subject: [PATCH 185/497] net/mlx5: HWS, Init mutex on the correct path The newly introduced mutex is only used for reformat actions, but it was initialized for modify header instead. The struct that contains the mutex is zero-initialized and an all-zero mutex is valid, so the issue only shows up with CONFIG_DEBUG_MUTEXES. Fixes: b206d9ec19df ("net/mlx5: HWS, register reformat actions with fw") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-5-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 9d1c0e4b224a..372e2be90706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -1357,6 +1357,7 @@ mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, pkt_reformat->fs_hws_action.pr_data = pr_data; } + mutex_init(&pkt_reformat->fs_hws_action.lock); pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_HWS; pkt_reformat->fs_hws_action.hws_action = hws_action; return 0; @@ -1503,7 +1504,6 @@ static int mlx5_cmd_hws_modify_header_alloc(struct mlx5_flow_root_namespace *ns, err = -ENOMEM; goto release_mh; } - mutex_init(&modify_hdr->fs_hws_action.lock); modify_hdr->fs_hws_action.mh_data = mh_data; modify_hdr->fs_hws_action.fs_pool = pool; modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW; From b5e3c76f35ee7e814c2469c73406c5bbf110d89c Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 10 Jun 2025 18:15:10 +0300 Subject: [PATCH 186/497] net/mlx5: HWS, fix missing ip_version handling in definer Fix missing field handling in definer - outer IP version. Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-6-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index 5cc0dc002ac1..d45e1145d197 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -785,6 +785,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O, outer_headers.ip_protocol, eth_l3_outer.protocol_next_header); + HWS_SET_HDR(fc, match_param, IP_VERSION_O, + outer_headers.ip_version, + eth_l3_outer.ip_version); HWS_SET_HDR(fc, match_param, IP_TTL_O, outer_headers.ttl_hoplimit, eth_l3_outer.time_to_live_hop_limit); From b8335829518ec5988294280e37d735799209d70d Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Tue, 10 Jun 2025 18:15:11 +0300 Subject: [PATCH 187/497] net/mlx5: HWS, make sure the uplink is the last destination When there are more than one destinations, we create a FW flow table and provide it with all the destinations. FW requires to have wire as the last destination in the list (if it exists), otherwise the operation fails with FW syndrome. This patch fixes the destination array action creation: if it contains a wire destination, it is moved to the end. Fixes: 504e536d9010 ("net/mlx5: HWS, added actions handling") Signed-off-by: Vlad Dogaru Reviewed-by: Yevgeny Kliteynik Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-7-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/action.c | 14 +++++++------- .../mellanox/mlx5/core/steering/hws/fs_hws.c | 3 +++ .../mellanox/mlx5/core/steering/hws/mlx5hws.h | 1 + 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index fb62f3bc4bd4..447ea3f8722c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -1370,8 +1370,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; struct mlx5hws_cmd_forward_tbl *fw_island; struct mlx5hws_action *action; - u32 i /*, packet_reformat_id*/; - int ret; + int ret, last_dest_idx = -1; + u32 i; if (num_dest <= 1) { mlx5hws_err(ctx, "Action must have multiple dests\n"); @@ -1401,11 +1401,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id; fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; fte_attr.ignore_flow_level = ignore_flow_level; - /* ToDo: In SW steering we have a handling of 'go to WIRE' - * destination here by upper layer setting 'is_wire_ft' flag - * if the destination is wire. - * This is because uplink should be last dest in the list. - */ + if (dests[i].is_wire_ft) + last_dest_idx = i; break; case MLX5HWS_ACTION_TYP_VPORT: dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -1429,6 +1426,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, } } + if (last_dest_idx != -1) + swap(dest_list[last_dest_idx], dest_list[num_dest - 1]); + fte_attr.dests_num = num_dest; fte_attr.dests = dest_list; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 372e2be90706..bf4643d0ce17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -966,6 +966,9 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, switch (attr->type) { case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst); + if (dst->dest_attr.ft->flags & + MLX5_FLOW_TABLE_UPLINK_VPORT) + dest_actions[num_dest_actions].is_wire_ft = true; break; case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 9bbadc4d8a0b..d8ac6c196211 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -213,6 +213,7 @@ struct mlx5hws_action_dest_attr { struct mlx5hws_action *dest; /* Optional reformat action */ struct mlx5hws_action *reformat; + bool is_wire_ft; }; /** From aa9c44b842096c553871bc68a8cebc7861fa192b Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 10 Jun 2025 18:15:13 +0300 Subject: [PATCH 188/497] net/mlx5e: Fix leak of Geneve TLV option object Previously, a unique tunnel id was added for the matching on TC non-zero chains, to support inner header rewrite with goto action. Later, it was used to support VF tunnel offload for vxlan, then for Geneve and GRE. To support VF tunnel, a temporary mlx5_flow_spec is used to parse tunnel options. For Geneve, if there is TLV option, a object is created, or refcnt is added if already exists. But the temporary mlx5_flow_spec is directly freed after parsing, which causes the leak because no information regarding the object is saved in flow's mlx5_flow_spec, which is used to free the object when deleting the flow. To fix the leak, call mlx5_geneve_tlv_option_del() before free the temporary spec if it has TLV object. Fixes: 521933cdc4aa ("net/mlx5e: Support Geneve and GRE with VF tunnel offload") Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Reviewed-by: Alex Lazar Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-9-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f1d908f61134..fef418e1ed1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2028,9 +2028,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return err; } -static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec) { - struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); @@ -2069,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } complete_all(&flow->del_hw_done); - if (mlx5_flow_has_geneve_opt(flow)) + if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); if (flow->decap_route) @@ -2574,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); if (err) { - kvfree(tmp_spec); NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); - return err; + } else { + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); } - err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + if (mlx5_flow_has_geneve_opt(tmp_spec)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); kvfree(tmp_spec); if (err) return err; From 875d7c160d60616ff06dedb70470ad199661efe7 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Tue, 10 Jun 2025 18:15:14 +0300 Subject: [PATCH 189/497] net/mlx5e: Fix number of lanes to UNKNOWN when using data_rate_oper When the link is up, either eth_proto_oper or ext_eth_proto_oper typically reports the active link protocol, from which both speed and number of lanes can be retrieved. However, in certain cases, such as when a NIC is connected via a non-standard cable, the firmware may not report the protocol. In such scenarios, the speed can still be obtained from the data_rate_oper field in PTYS register. Since data_rate_oper provides only speed information and lacks lane details, it is incorrect to derive the number of lanes from it. This patch corrects the behavior by setting the number of lanes to UNKNOWN instead of incorrectly using MAX_LANES when relying on data_rate_oper. Fixes: 7e959797f021 ("net/mlx5e: Enable lanes configuration when auto-negotiation is off") Signed-off-by: Shahar Shitrit Reviewed-by: Tariq Toukan Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250610151514.1094735-10-mbloch@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index ea078c9f5d15..3cb8d3bf9044 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -43,7 +43,6 @@ #include "en/fs_ethtool.h" #define LANES_UNKNOWN 0 -#define MAX_LANES 8 void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) @@ -1098,10 +1097,8 @@ static void get_link_properties(struct net_device *netdev, speed = info->speed; lanes = info->lanes; duplex = DUPLEX_FULL; - } else if (data_rate_oper) { + } else if (data_rate_oper) speed = 100 * data_rate_oper; - lanes = MAX_LANES; - } out: link_ksettings->base.duplex = duplex; From 403d1338a4a59cfebb4ded53fa35fbd5119f36b1 Mon Sep 17 00:00:00 2001 From: Magnus Lindholm Date: Tue, 18 Feb 2025 18:55:14 +0100 Subject: [PATCH 190/497] mm: pgtable: fix pte_swp_exclusive Make pte_swp_exclusive return bool instead of int. This will better reflect how pte_swp_exclusive is actually used in the code. This fixes swap/swapoff problems on Alpha due pte_swp_exclusive not returning correct values when _PAGE_SWP_EXCLUSIVE bit resides in upper 32-bits of PTE (like on alpha). Suggested-by: Al Viro Signed-off-by: Magnus Lindholm Cc: Sam James Link: https://lore.kernel.org/lkml/20250218175735.19882-2-linmag7@gmail.com/ Link: https://lore.kernel.org/lkml/20250602041118.GA2675383@ZenIV/ [ Applied as the 'sed' script Al suggested - Linus ] Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgtable.h | 2 +- arch/arc/include/asm/pgtable-bits-arcv2.h | 2 +- arch/arm/include/asm/pgtable.h | 2 +- arch/arm64/include/asm/pgtable.h | 2 +- arch/csky/include/asm/pgtable.h | 2 +- arch/hexagon/include/asm/pgtable.h | 2 +- arch/loongarch/include/asm/pgtable.h | 2 +- arch/m68k/include/asm/mcf_pgtable.h | 2 +- arch/m68k/include/asm/motorola_pgtable.h | 2 +- arch/m68k/include/asm/sun3_pgtable.h | 2 +- arch/microblaze/include/asm/pgtable.h | 2 +- arch/mips/include/asm/pgtable.h | 4 ++-- arch/nios2/include/asm/pgtable.h | 2 +- arch/openrisc/include/asm/pgtable.h | 2 +- arch/parisc/include/asm/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/32/pgtable.h | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 2 +- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- arch/riscv/include/asm/pgtable.h | 2 +- arch/s390/include/asm/pgtable.h | 2 +- arch/sh/include/asm/pgtable_32.h | 2 +- arch/sparc/include/asm/pgtable_32.h | 2 +- arch/sparc/include/asm/pgtable_64.h | 2 +- arch/um/include/asm/pgtable.h | 2 +- arch/x86/include/asm/pgtable.h | 2 +- arch/xtensa/include/asm/pgtable.h | 2 +- 26 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 2676017f42f1..44e7aedac6e8 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -327,7 +327,7 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h index 8ebec1b21d24..3084c53f402d 100644 --- a/arch/arc/include/asm/pgtable-bits-arcv2.h +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h @@ -130,7 +130,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 7f1c3b4e3e04..86378eec7757 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -301,7 +301,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(swp) __pte((swp).val) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_isset(pte, L_PTE_SWP_EXCLUSIVE); } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 88db8a0c0b37..192d86e1cc76 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -563,7 +563,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & PTE_SWP_EXCLUSIVE; } diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index b8378431aeff..5a394be09c35 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -200,7 +200,7 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 9fbdfdbc539f..fbf24d1d1ca6 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -387,7 +387,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) (((type & 0x1f) << 1) | \ ((offset & 0x3ffff8) << 10) | ((offset & 0x7) << 7)) }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index a3f17914dbab..b30185302c07 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -301,7 +301,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) #define __swp_entry_to_pmd(x) ((pmd_t) { (x).val | _PAGE_HUGE }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index f5c596b211d4..d79fef609194 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -268,7 +268,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) (__pte((x).val)) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 040ac3bad713..14fee64d3e60 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -185,7 +185,7 @@ extern pgd_t kernel_pg_dir[128]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/m68k/include/asm/sun3_pgtable.h b/arch/m68k/include/asm/sun3_pgtable.h index 73745dc0ec0e..858cbe936f5b 100644 --- a/arch/m68k/include/asm/sun3_pgtable.h +++ b/arch/m68k/include/asm/sun3_pgtable.h @@ -169,7 +169,7 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index b1bb2c65dd04..bae1abfa6f6b 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -398,7 +398,7 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 2 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 2 }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 4852b005a72d..ae73ecf4c41a 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -534,7 +534,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #endif #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte.pte_low & _PAGE_SWP_EXCLUSIVE; } @@ -551,7 +551,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } #else -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index e98578e27e26..844dce55569f 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -259,7 +259,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 71bfb8c8c482..5bd6463bd514 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -411,7 +411,7 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 80f5e2a28413..1a86a4370b29 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -425,7 +425,7 @@ static inline void set_ptes(struct mm_struct *mm, unsigned long addr, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 42c3af90d1f0..92d21c6faf1e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -365,7 +365,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 6ed93e290c2f..a2ddcbb3fcb9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -693,7 +693,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE)); } diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 8d1f0b7062eb..7d6b9e5b286e 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -286,7 +286,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index a11816bbf9e7..438ce7df24c3 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -1028,7 +1028,7 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1c661ac62ce8..6d8bc27a366e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -915,7 +915,7 @@ static inline int pmd_protnone(pmd_t pmd) } #endif -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/sh/include/asm/pgtable_32.h b/arch/sh/include/asm/pgtable_32.h index 71b18741cc11..5f51af18997b 100644 --- a/arch/sh/include/asm/pgtable_32.h +++ b/arch/sh/include/asm/pgtable_32.h @@ -470,7 +470,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) /* In both cases, we borrow bit 6 to store the exclusive marker in swap PTEs. */ #define _PAGE_SWP_EXCLUSIVE _PAGE_USER -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte.pte_low & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 1454ebe91539..7c199c003ffe 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -348,7 +348,7 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & SRMMU_SWP_EXCLUSIVE; } diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 4af03e3c161b..669cd02469a1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -1023,7 +1023,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index ca2a519d53ab..24fdea6f88c3 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -314,7 +314,7 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr); ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_get_bits(pte, _PAGE_SWP_EXCLUSIVE); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 774430c3abff..97954c936c54 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1561,7 +1561,7 @@ static inline pte_t pte_swp_mkexclusive(pte_t pte) return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); } -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_flags(pte) & _PAGE_SWP_EXCLUSIVE; } diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index cb1725c40e36..d62aa1c316fc 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -349,7 +349,7 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -static inline int pte_swp_exclusive(pte_t pte) +static inline bool pte_swp_exclusive(pte_t pte) { return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; } From ba9db6f907ac02215e30128770f85fbd7db2fcf9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Jun 2025 17:12:44 -0700 Subject: [PATCH 191/497] net: clear the dst when changing skb protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A not-so-careful NAT46 BPF program can crash the kernel if it indiscriminately flips ingress packets from v4 to v6: BUG: kernel NULL pointer dereference, address: 0000000000000000 ip6_rcv_core (net/ipv6/ip6_input.c:190:20) ipv6_rcv (net/ipv6/ip6_input.c:306:8) process_backlog (net/core/dev.c:6186:4) napi_poll (net/core/dev.c:6906:9) net_rx_action (net/core/dev.c:7028:13) do_softirq (kernel/softirq.c:462:3) netif_rx (net/core/dev.c:5326:3) dev_loopback_xmit (net/core/dev.c:4015:2) ip_mc_finish_output (net/ipv4/ip_output.c:363:8) NF_HOOK (./include/linux/netfilter.h:314:9) ip_mc_output (net/ipv4/ip_output.c:400:5) dst_output (./include/net/dst.h:459:9) ip_local_out (net/ipv4/ip_output.c:130:9) ip_send_skb (net/ipv4/ip_output.c:1496:8) udp_send_skb (net/ipv4/udp.c:1040:8) udp_sendmsg (net/ipv4/udp.c:1328:10) The output interface has a 4->6 program attached at ingress. We try to loop the multicast skb back to the sending socket. Ingress BPF runs as part of netif_rx(), pushes a valid v6 hdr and changes skb->protocol to v6. We enter ip6_rcv_core which tries to use skb_dst(). But the dst is still an IPv4 one left after IPv4 mcast output. Clear the dst in all BPF helpers which change the protocol. Try to preserve metadata dsts, those may carry non-routing metadata. Cc: stable@vger.kernel.org Reviewed-by: Maciej Żenczykowski Acked-by: Daniel Borkmann Fixes: d219df60a70e ("bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()") Fixes: 1b00e0dfe7d0 ("bpf: update skb->protocol in bpf_skb_net_grow") Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250610001245.1981782-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/filter.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 327ca73f9cd7..7a72f766aacf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3233,6 +3233,13 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) +{ + skb->protocol = htons(proto); + if (skb_valid_dst(skb)) + skb_dst_drop(skb); +} + static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { /* Caller already did skb_cow() with len as headroom, @@ -3329,7 +3336,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) } } - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); skb_clear_hash(skb); return 0; @@ -3359,7 +3366,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) } } - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); skb_clear_hash(skb); return 0; @@ -3550,10 +3557,10 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, /* Match skb->protocol to new outer l3 protocol */ if (skb->protocol == htons(ETH_P_IP) && flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); else if (skb->protocol == htons(ETH_P_IPV6) && flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); } if (skb_is_gso(skb)) { @@ -3606,10 +3613,10 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, /* Match skb->protocol to new outer l3 protocol */ if (skb->protocol == htons(ETH_P_IP) && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) - skb->protocol = htons(ETH_P_IPV6); + bpf_skb_change_protocol(skb, ETH_P_IPV6); else if (skb->protocol == htons(ETH_P_IPV6) && flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) - skb->protocol = htons(ETH_P_IP); + bpf_skb_change_protocol(skb, ETH_P_IP); if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); From 567766954b2d5d6c29f568ad4382d6351ea48079 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Jun 2025 17:12:45 -0700 Subject: [PATCH 192/497] selftests: net: add test case for NAT46 looping back dst Simple test for crash involving multicast loopback and stale dst. Reuse exising NAT46 program. Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250610001245.1981782-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/nat6to4.sh | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100755 tools/testing/selftests/net/nat6to4.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ea84b88bcb30..ab996bd22a5f 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -27,6 +27,7 @@ TEST_PROGS += amt.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh TEST_PROGS += udpgro_frglist.sh +TEST_PROGS += nat6to4.sh TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh new file mode 100755 index 000000000000..0ee859b622a4 --- /dev/null +++ b/tools/testing/selftests/net/nat6to4.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NS="ns-peer-$(mktemp -u XXXXXX)" + +ip netns add "${NS}" +ip -netns "${NS}" link set lo up +ip -netns "${NS}" route add default via 127.0.0.2 dev lo + +tc -n "${NS}" qdisc add dev lo ingress +tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \ + bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action + +ip netns exec "${NS}" \ + bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1' From 27cea0e419d2f9dc6f51bbce5a44c70bc3774b9a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 10 Jun 2025 23:56:58 +0000 Subject: [PATCH 193/497] MAINTAINERS: Update Kuniyuki Iwashima's email address. I left Amazon and joined Google, so let's map the email addresses accordingly. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250610235734.88540-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- .mailmap | 3 +++ MAINTAINERS | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index 074082ce9299..7890b69a9b6e 100644 --- a/.mailmap +++ b/.mailmap @@ -424,6 +424,9 @@ Krzysztof Wilczyński Krzysztof Wilczyński Kshitiz Godara Kuninori Morimoto +Kuniyuki Iwashima +Kuniyuki Iwashima +Kuniyuki Iwashima Kuogee Hsieh Lee Jones Lee Jones diff --git a/MAINTAINERS b/MAINTAINERS index f2668b81115c..bd09337f178d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17420,7 +17420,7 @@ F: tools/testing/selftests/net/srv6* NETWORKING [TCP] M: Eric Dumazet M: Neal Cardwell -R: Kuniyuki Iwashima +R: Kuniyuki Iwashima L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -17450,7 +17450,7 @@ F: net/tls/* NETWORKING [SOCKETS] M: Eric Dumazet -M: Kuniyuki Iwashima +M: Kuniyuki Iwashima M: Paolo Abeni M: Willem de Bruijn S: Maintained @@ -17465,7 +17465,7 @@ F: net/core/scm.c F: net/socket.c NETWORKING [UNIX SOCKETS] -M: Kuniyuki Iwashima +M: Kuniyuki Iwashima S: Maintained F: include/net/af_unix.h F: include/net/netns/unix.h From 903cc7096db22f889d48e2cee8840709ce04fdac Mon Sep 17 00:00:00 2001 From: Akhil R Date: Tue, 3 Jun 2025 21:00:20 +0530 Subject: [PATCH 194/497] dt-bindings: i2c: nvidia,tegra20-i2c: Specify the required properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specify the properties which are essential and which are not for the Tegra I2C driver to function correctly. This was not added correctly when the TXT binding was converted to yaml. All the existing DT nodes have these properties already and hence this does not break the ABI. dmas and dma-names which were specified as a must in the TXT binding is now made optional since the driver can work in PIO mode if dmas are missing. Fixes: f10a9b722f80 ("dt-bindings: i2c: tegra: Convert to json-schema”) Signed-off-by: Akhil R Cc: # v5.17+ Reviewed-by: Krzysztof Kozlowski Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20250603153022.39434-1-akhilrajeev@nvidia.com --- .../bindings/i2c/nvidia,tegra20-i2c.yaml | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml index b57ae6963e62..6b6f6762d122 100644 --- a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.yaml @@ -97,7 +97,10 @@ properties: resets: items: - - description: module reset + - description: + Module reset. This property is optional for controllers in Tegra194, + Tegra234 etc where an internal software reset is available as an + alternative. reset-names: items: @@ -116,6 +119,13 @@ properties: - const: rx - const: tx +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + allOf: - $ref: /schemas/i2c/i2c-controller.yaml - if: @@ -169,6 +179,18 @@ allOf: properties: power-domains: false + - if: + not: + properties: + compatible: + contains: + enum: + - nvidia,tegra194-i2c + then: + required: + - resets + - reset-names + unevaluatedProperties: false examples: From ebf2e500e06f707654572bc7d8bc569a8caa51aa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 9 Jun 2025 12:38:43 +0530 Subject: [PATCH 195/497] rust: cpu: Introduce CpuId abstraction This adds abstraction for representing a CPU identifier. Suggested-by: Boqun Feng Signed-off-by: Viresh Kumar Reviewed-by: Boqun Feng --- rust/kernel/cpu.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs index 10c5c3b25873..6a3aecb12468 100644 --- a/rust/kernel/cpu.rs +++ b/rust/kernel/cpu.rs @@ -6,6 +6,116 @@ use crate::{bindings, device::Device, error::Result, prelude::ENODEV}; +/// Returns the maximum number of possible CPUs in the current system configuration. +#[inline] +pub fn nr_cpu_ids() -> u32 { + #[cfg(any(NR_CPUS_1, CONFIG_FORCE_NR_CPUS))] + { + bindings::NR_CPUS + } + + #[cfg(not(any(NR_CPUS_1, CONFIG_FORCE_NR_CPUS)))] + // SAFETY: `nr_cpu_ids` is a valid global provided by the kernel. + unsafe { + bindings::nr_cpu_ids + } +} + +/// The CPU ID. +/// +/// Represents a CPU identifier as a wrapper around an [`u32`]. +/// +/// # Invariants +/// +/// The CPU ID lies within the range `[0, nr_cpu_ids())`. +/// +/// # Examples +/// +/// ``` +/// use kernel::cpu::CpuId; +/// +/// let cpu = 0; +/// +/// // SAFETY: 0 is always a valid CPU number. +/// let id = unsafe { CpuId::from_u32_unchecked(cpu) }; +/// +/// assert_eq!(id.as_u32(), cpu); +/// assert!(CpuId::from_i32(0).is_some()); +/// assert!(CpuId::from_i32(-1).is_none()); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct CpuId(u32); + +impl CpuId { + /// Creates a new [`CpuId`] from the given `id` without checking bounds. + /// + /// # Safety + /// + /// The caller must ensure that `id` is a valid CPU ID (i.e., `0 <= id < nr_cpu_ids()`). + #[inline] + pub unsafe fn from_i32_unchecked(id: i32) -> Self { + debug_assert!(id >= 0); + debug_assert!((id as u32) < nr_cpu_ids()); + + // INVARIANT: The function safety guarantees `id` is a valid CPU id. + Self(id as u32) + } + + /// Creates a new [`CpuId`] from the given `id`, checking that it is valid. + pub fn from_i32(id: i32) -> Option { + if id < 0 || id as u32 >= nr_cpu_ids() { + None + } else { + // INVARIANT: `id` has just been checked as a valid CPU ID. + Some(Self(id as u32)) + } + } + + /// Creates a new [`CpuId`] from the given `id` without checking bounds. + /// + /// # Safety + /// + /// The caller must ensure that `id` is a valid CPU ID (i.e., `0 <= id < nr_cpu_ids()`). + #[inline] + pub unsafe fn from_u32_unchecked(id: u32) -> Self { + debug_assert!(id < nr_cpu_ids()); + + // Ensure the `id` fits in an [`i32`] as it's also representable that way. + debug_assert!(id <= i32::MAX as u32); + + // INVARIANT: The function safety guarantees `id` is a valid CPU id. + Self(id) + } + + /// Creates a new [`CpuId`] from the given `id`, checking that it is valid. + pub fn from_u32(id: u32) -> Option { + if id >= nr_cpu_ids() { + None + } else { + // INVARIANT: `id` has just been checked as a valid CPU ID. + Some(Self(id)) + } + } + + /// Returns CPU number. + #[inline] + pub fn as_u32(&self) -> u32 { + self.0 + } +} + +impl From for u32 { + fn from(id: CpuId) -> Self { + id.as_u32() + } +} + +impl From for i32 { + fn from(id: CpuId) -> Self { + id.as_u32() as i32 + } +} + /// Creates a new instance of CPU's device. /// /// # Safety From 47fe65b105f29ef22f463c17ab8a4c0eeb741045 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Jun 2025 20:22:17 -0400 Subject: [PATCH 196/497] bcachefs: Add missing restart handling to check_topology() The next patch will add logging of the specific error being corrected in repair paths to the journal; this means __bch2_fsck_err() can return transaction restarts in places that previously weren't expecting them. check_topology() is old code that doesn't use btree iterators for btree node locking - it'll have to be rewritten in the future to work online. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_gc.c | 95 ++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 35 deletions(-) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 9ddcbe1bda78..e92cf3928c63 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -397,7 +397,11 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct continue; } - ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan); + ret = lockrestart_do(trans, + btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan)); + if (ret < 0) + goto err; + if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; @@ -438,7 +442,8 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct if (!ret && !IS_ERR_OR_NULL(prev)) { BUG_ON(cur); - ret = btree_repair_node_end(trans, b, prev, pulled_from_scan); + ret = lockrestart_do(trans, + btree_repair_node_end(trans, b, prev, pulled_from_scan)); if (ret == DID_FILL_FROM_SCAN) { new_pass = true; ret = 0; @@ -519,6 +524,46 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct bch2_bkey_buf_exit(&prev_k, c); bch2_bkey_buf_exit(&cur_k, c); printbuf_exit(&buf); + bch_err_fn(c, ret); + return ret; +} + +static int bch2_check_root(struct btree_trans *trans, enum btree_id i, + bool *reconstructed_root) +{ + struct bch_fs *c = trans->c; + struct btree_root *r = bch2_btree_id_root(c, i); + struct printbuf buf = PRINTBUF; + int ret = 0; + + bch2_btree_id_to_text(&buf, i); + + if (r->error) { + bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); + + r->alive = false; + r->error = 0; + + if (!bch2_btree_has_scanned_nodes(c, i)) { + __fsck_err(trans, + FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), + btree_root_unreadable_and_scan_found_nothing, + "no nodes found for btree %s, continue?", buf.buf); + bch2_btree_root_alloc_fake_trans(trans, i, 0); + } else { + bch2_btree_root_alloc_fake_trans(trans, i, 1); + bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); + ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); + if (ret) + goto err; + } + + *reconstructed_root = true; + } +err: +fsck_err: + printbuf_exit(&buf); + bch_err_fn(c, ret); return ret; } @@ -526,42 +571,18 @@ int bch2_check_topology(struct bch_fs *c) { struct btree_trans *trans = bch2_trans_get(c); struct bpos pulled_from_scan = POS_MIN; - struct printbuf buf = PRINTBUF; int ret = 0; bch2_trans_srcu_unlock(trans); for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) { - struct btree_root *r = bch2_btree_id_root(c, i); bool reconstructed_root = false; +recover: + ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root)); + if (ret) + break; - printbuf_reset(&buf); - bch2_btree_id_to_text(&buf, i); - - if (r->error) { -reconstruct_root: - bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf); - - r->alive = false; - r->error = 0; - - if (!bch2_btree_has_scanned_nodes(c, i)) { - __fsck_err(trans, - FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0), - btree_root_unreadable_and_scan_found_nothing, - "no nodes found for btree %s, continue?", buf.buf); - bch2_btree_root_alloc_fake_trans(trans, i, 0); - } else { - bch2_btree_root_alloc_fake_trans(trans, i, 1); - bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); - ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX); - if (ret) - break; - } - - reconstructed_root = true; - } - + struct btree_root *r = bch2_btree_id_root(c, i); struct btree *b = r->b; btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); @@ -575,17 +596,21 @@ int bch2_check_topology(struct bch_fs *c) r->b = NULL; - if (!reconstructed_root) - goto reconstruct_root; + if (!reconstructed_root) { + r->error = -EIO; + goto recover; + } + struct printbuf buf = PRINTBUF; + bch2_btree_id_to_text(&buf, i); bch_err(c, "empty btree root %s", buf.buf); + printbuf_exit(&buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); r->alive = false; ret = 0; } } -fsck_err: - printbuf_exit(&buf); + bch2_trans_put(trans); return ret; } From b43f724927686387589b98eb762e1a4109bb1bac Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 1 Jun 2025 13:07:31 -0400 Subject: [PATCH 197/497] bcachefs: Log fsck errors in the journal Log the specific error being corrected in the journal when we're repairing, this helps greatly with 'bcachefs list_journal' analysis. Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 63951e293c47..ff49cebfd0e8 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -620,6 +620,9 @@ int __bch2_fsck_err(struct bch_fs *c, if (s) s->ret = ret; + + if (trans) + ret = bch2_trans_log_str(trans, bch2_sb_error_strs[err]) ?: ret; err_unlock: mutex_unlock(&c->fsck_error_msgs_lock); err: From c7e351be7aa4e176223f0128e4d1b959ffad9598 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Jun 2025 19:00:11 -0400 Subject: [PATCH 198/497] bcachefs: Add range being updated to btree_update_to_text() We had a deadlock during recovery where interior btree updates became wedged and all open_buckets were consumed; start adding more introspection. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 26 ++++++++++++++++++++++++-- fs/bcachefs/btree_update_interior.h | 7 +++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index d2ecb782919b..340812b28d08 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1232,6 +1232,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, list_add_tail(&as->list, &c->btree_interior_update_list); mutex_unlock(&c->btree_interior_update_lock); + struct btree *b = btree_path_node(path, path->level); + as->node_start = b->data->min_key; + as->node_end = b->data->max_key; + as->node_needed_rewrite = btree_node_need_rewrite(b); + as->node_written = b->written; + as->node_sectors = btree_buf_bytes(b) >> 9; + as->node_remaining = __bch2_btree_u64s_remaining(b, + btree_bkey_last(b, bset_tree_last(b))); + /* * We don't want to allocate if we're in an error state, that can cause * deadlock on emergency shutdown due to open buckets getting stuck in @@ -2108,6 +2117,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, if (ret) goto err; + as->node_start = prev->data->min_key; + as->node_end = next->data->max_key; + trace_and_count(c, btree_node_merge, trans, b); n = bch2_btree_node_alloc(as, trans, b->c.level); @@ -2681,9 +2693,19 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update prt_str(out, " "); bch2_btree_id_to_text(out, as->btree_id); - prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", + prt_printf(out, " l=%u-%u ", as->update_level_start, - as->update_level_end, + as->update_level_end); + bch2_bpos_to_text(out, as->node_start); + prt_char(out, ' '); + bch2_bpos_to_text(out, as->node_end); + prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %u", + as->node_written, + as->node_sectors, + as->node_remaining, + as->node_needed_rewrite); + + prt_printf(out, "\nmode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", bch2_btree_update_modes[as->mode], as->nodes_written, closure_nr_remaining(&as->cl), diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 7fe793788a79..85682a13de4d 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -57,6 +57,13 @@ struct btree_update { unsigned took_gc_lock:1; enum btree_id btree_id; + struct bpos node_start; + struct bpos node_end; + bool node_needed_rewrite; + u16 node_written; + u16 node_sectors; + u16 node_remaining; + unsigned update_level_start; unsigned update_level_end; From b76cce12700b8bb8d59b9ff444504b94db96dd4b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 5 Jun 2025 20:53:01 -0400 Subject: [PATCH 199/497] bcachefs: Add more flags to btree nodes for rewrite reason It seems excessive forced btree node rewrites can cause interior btree updates to become wedged during recovery, before we're using the write buffer for backpointer updates. Add more flags so we can determine where these are coming from. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 10 ++++++++-- fs/bcachefs/btree_types.h | 29 +++++++++++++++++++++++++++++ fs/bcachefs/btree_update_interior.c | 13 ++++++++++--- fs/bcachefs/btree_update_interior.h | 2 +- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 57eff3012a7b..6787d5b91eab 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1045,6 +1045,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, le16_add_cpu(&i->u64s, -next_good_key); memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_error(b); } fsck_err: printbuf_exit(&buf); @@ -1305,6 +1306,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_bset_end(b, b->set); set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_error(b); continue; } if (ret) @@ -1329,12 +1331,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_degraded(b); + } } - if (!ptr_written) + if (!ptr_written) { set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_ptr_written_zero(b); + } fsck_err: mempool_free(iter, &c->fill_iter); printbuf_exit(&buf); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index c61c4171ae50..3aa4a602bd02 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -617,6 +617,9 @@ enum btree_write_type { x(dying) \ x(fake) \ x(need_rewrite) \ + x(need_rewrite_error) \ + x(need_rewrite_degraded) \ + x(need_rewrite_ptr_written_zero) \ x(never_write) \ x(pinned) @@ -641,6 +644,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \ BTREE_FLAGS() #undef x +#define BTREE_NODE_REWRITE_REASON() \ + x(none) \ + x(unknown) \ + x(error) \ + x(degraded) \ + x(ptr_written_zero) + +enum btree_node_rewrite_reason { +#define x(n) BTREE_NODE_REWRITE_##n, + BTREE_NODE_REWRITE_REASON() +#undef x +}; + +static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b) +{ + if (btree_node_need_rewrite_ptr_written_zero(b)) + return BTREE_NODE_REWRITE_ptr_written_zero; + if (btree_node_need_rewrite_degraded(b)) + return BTREE_NODE_REWRITE_degraded; + if (btree_node_need_rewrite_error(b)) + return BTREE_NODE_REWRITE_error; + if (btree_node_need_rewrite(b)) + return BTREE_NODE_REWRITE_unknown; + return BTREE_NODE_REWRITE_none; +} + static inline struct btree_write *btree_current_write(struct btree *b) { return b->writes + btree_node_write_idx(b); diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 340812b28d08..e77584607f0d 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1138,6 +1138,13 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans * start_time); } +static const char * const btree_node_reawrite_reason_strs[] = { +#define x(n) #n, + BTREE_NODE_REWRITE_REASON() +#undef x + NULL, +}; + static struct btree_update * bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, unsigned level_start, bool split, @@ -1235,7 +1242,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, struct btree *b = btree_path_node(path, path->level); as->node_start = b->data->min_key; as->node_end = b->data->max_key; - as->node_needed_rewrite = btree_node_need_rewrite(b); + as->node_needed_rewrite = btree_node_rewrite_reason(b); as->node_written = b->written; as->node_sectors = btree_buf_bytes(b) >> 9; as->node_remaining = __bch2_btree_u64s_remaining(b, @@ -2699,11 +2706,11 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update bch2_bpos_to_text(out, as->node_start); prt_char(out, ' '); bch2_bpos_to_text(out, as->node_end); - prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %u", + prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %s", as->node_written, as->node_sectors, as->node_remaining, - as->node_needed_rewrite); + btree_node_reawrite_reason_strs[as->node_needed_rewrite]); prt_printf(out, "\nmode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n", bch2_btree_update_modes[as->mode], diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 85682a13de4d..b649c36c3fbb 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -59,7 +59,7 @@ struct btree_update { enum btree_id btree_id; struct bpos node_start; struct bpos node_end; - bool node_needed_rewrite; + enum btree_node_rewrite_reason node_needed_rewrite; u16 node_written; u16 node_sectors; u16 node_remaining; From af5b88618a38371545251ad92ce42bc4bfa1142a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 12:01:22 -0400 Subject: [PATCH 200/497] bcachefs: Update /dev/disk/by-uuid on device add Invalidate pagecache after we write the new superblock and send a uevent. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 397a69da5a75..8648f22411be 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1995,6 +1995,22 @@ int bch2_dev_add(struct bch_fs *c, const char *path) goto err_late; } + /* + * We just changed the superblock UUID, invalidate cache and send a + * uevent to update /dev/disk/by-uuid + */ + invalidate_bdev(ca->disk_sb.bdev); + + char uuid_str[37]; + snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); + + char *envp[] = { + "CHANGE=uuid", + uuid_str, + NULL, + }; + kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + up_write(&c->state_lock); out: printbuf_exit(&label); From 7b0e6b198e4ef39ec89285d953778c5a3a04d390 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 12:56:15 -0400 Subject: [PATCH 201/497] bcachefs: Mark need_discard_freespace_key_bad autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 6fdbf265e4c0..90c878f289b2 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -134,7 +134,7 @@ enum bch_fsck_flags { x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \ x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ - x(need_discard_freespace_key_bad, 124, 0) \ + x(need_discard_freespace_key_bad, 124, FSCK_AUTOFIX) \ x(discarding_bucket_not_in_need_discard_btree, 291, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_level_bad, 294, 0) \ From b47a82ff4772ea9d7091b85ef5f34dc78c866a02 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 12:56:33 -0400 Subject: [PATCH 202/497] bcachefs: Only run 'increase_depth' for keys from btree node csan bch2_btree_increase_depth() was originally for disaster recovery, to get some data back from the journal when a btree root was bad. We don't need it for that purpose anymore; on bad btree root we'll launch btree node scan and reconstruct all the interior nodes. If there's a key in the journal for a depth that doesn't exists, and it's not from check_topology/btree node scan, we should just ignore it. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1e68e61f08e8..dbd066260725 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -271,13 +271,24 @@ static int bch2_journal_replay_key(struct btree_trans *trans, goto out; struct btree_path *path = btree_iter_path(trans, &iter); - if (unlikely(!btree_path_node(path, k->level))) { + if (unlikely(!btree_path_node(path, k->level) && + !k->allocated)) { + struct bch_fs *c = trans->c; + + if (!(c->recovery.passes_complete & (BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes)| + BIT_ULL(BCH_RECOVERY_PASS_check_topology)))) { + bch_err(c, "have key in journal replay for btree depth that does not exist, confused"); + ret = -EINVAL; + } +#if 0 bch2_trans_iter_exit(trans, &iter); bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p, BTREE_MAX_DEPTH, 0, iter_flags); ret = bch2_btree_iter_traverse(trans, &iter) ?: bch2_btree_increase_depth(trans, iter.path, 0) ?: -BCH_ERR_transaction_restart_nested; +#endif + k->overwritten = true; goto out; } From dd22844f48a71a414bd7e08cd4a9a5847974c07e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 14:22:24 -0400 Subject: [PATCH 203/497] bcachefs: Read error message now prints if self healing Signed-off-by: Kent Overstreet --- fs/bcachefs/io_read.c | 11 +++++++++-- fs/bcachefs/io_read.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index a77779afad01..04bbdcf58e40 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -343,6 +343,10 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, *bounce = true; *read_full = promote_full; + + if (have_io_error(failed)) + orig->self_healing = true; + return promote; nopromote: trace_io_read_nopromote(c, ret); @@ -635,12 +639,15 @@ static void bch2_rbio_retry(struct work_struct *work) prt_str(&buf, "(internal move) "); prt_str(&buf, "data read error, "); - if (!ret) + if (!ret) { prt_str(&buf, "successful retry"); - else + if (rbio->self_healing) + prt_str(&buf, ", self healing"); + } else prt_str(&buf, bch2_err_str(ret)); prt_newline(&buf); + if (!bkey_deleted(&sk.k->k)) { bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(sk.k)); prt_newline(&buf); diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index 45c959018919..9c5ddbf861b3 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -44,6 +44,7 @@ struct bch_read_bio { have_ioref:1, narrow_crcs:1, saw_error:1, + self_healing:1, context:2; }; u16 _state; From 263561649ee5875a922f4358e96d3deb17a91021 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 14:27:35 -0400 Subject: [PATCH 204/497] bcachefs: Don't persistently run scan_for_btree_nodes bch2_btree_lost_data() gets called on btree node read error, but the error might be transient. btree_node_scan is expensive, and there's no need to run it persistently (marking it in the superblock as required to run) - check_topology will run it if required, via bch2_get_scanned_nodes(). Running it non-persistently is fine, to avoid check_topology having to rewind recovery to run it. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 2 ++ fs/bcachefs/recovery_passes.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index dbd066260725..06d0ba0fbffb 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -99,9 +99,11 @@ int bch2_btree_lost_data(struct bch_fs *c, goto out; case BTREE_ID_snapshots: ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_reconstruct_snapshots, 0) ?: ret; + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; goto out; default: + ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_topology, 0) ?: ret; ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_scan_for_btree_nodes, 0) ?: ret; goto out; } diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 605588e33fb3..35ac0d64d73a 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -294,8 +294,13 @@ static bool recovery_pass_needs_set(struct bch_fs *c, enum bch_run_recovery_pass_flags *flags) { struct bch_fs_recovery *r = &c->recovery; - bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); - bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); + + /* + * Never run scan_for_btree_nodes persistently: check_topology will run + * it if required + */ + if (pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) + *flags |= RUN_RECOVERY_PASS_nopersistent; if ((*flags & RUN_RECOVERY_PASS_ratelimit) && !bch2_recovery_pass_want_ratelimit(c, pass)) @@ -310,6 +315,8 @@ static bool recovery_pass_needs_set(struct bch_fs *c, * Otherwise, we run run_explicit_recovery_pass when we find damage, so * it should run again even if it's already run: */ + bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); + bool persistent = !in_recovery || !(*flags & RUN_RECOVERY_PASS_nopersistent); if (persistent ? !(c->sb.recovery_passes_required & BIT_ULL(pass)) @@ -334,6 +341,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, struct bch_fs_recovery *r = &c->recovery; int ret = 0; + lockdep_assert_held(&c->sb_lock); bch2_printbuf_make_room(out, 1024); @@ -446,7 +454,7 @@ int bch2_require_recovery_pass(struct bch_fs *c, int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) { - enum bch_run_recovery_pass_flags flags = RUN_RECOVERY_PASS_nopersistent; + enum bch_run_recovery_pass_flags flags = 0; if (!recovery_pass_needs_set(c, pass, &flags)) return 0; From 3315113af178041ab474723804e1322cee7d0a97 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 18:56:28 -0400 Subject: [PATCH 205/497] bcachefs: mark more errors autofix Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 90c878f289b2..d06e73884871 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -165,7 +165,7 @@ enum bch_fsck_flags { x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ x(ptr_to_missing_stripe, 150, 0) \ x(ptr_to_incorrect_stripe, 151, 0) \ - x(ptr_gen_newer_than_bucket_gen, 152, 0) \ + x(ptr_gen_newer_than_bucket_gen, 152, FSCK_AUTOFIX) \ x(ptr_too_stale, 153, 0) \ x(stale_dirty_ptr, 154, FSCK_AUTOFIX) \ x(ptr_bucket_data_type_mismatch, 155, 0) \ @@ -236,7 +236,7 @@ enum bch_fsck_flags { x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ - x(inode_has_child_snapshots_wrong, 287, 0) \ + x(inode_has_child_snapshots_wrong, 287, FSCK_AUTOFIX) \ x(inode_unreachable, 210, FSCK_AUTOFIX) \ x(inode_journal_seq_in_future, 299, FSCK_AUTOFIX) \ x(inode_i_sectors_underflow, 312, FSCK_AUTOFIX) \ @@ -279,8 +279,8 @@ enum bch_fsck_flags { x(root_dir_missing, 239, 0) \ x(root_inode_not_dir, 240, 0) \ x(dir_loop, 241, 0) \ - x(hash_table_key_duplicate, 242, 0) \ - x(hash_table_key_wrong_offset, 243, 0) \ + x(hash_table_key_duplicate, 242, FSCK_AUTOFIX) \ + x(hash_table_key_wrong_offset, 243, FSCK_AUTOFIX) \ x(unlinked_inode_not_on_deleted_list, 244, FSCK_AUTOFIX) \ x(reflink_p_front_pad_bad, 245, 0) \ x(journal_entry_dup_same_device, 246, 0) \ From 0acb385ec19c99b213d28a309a941790758c53a8 Mon Sep 17 00:00:00 2001 From: Alan Huang Date: Tue, 20 May 2025 15:34:28 +0800 Subject: [PATCH 206/497] bcachefs: Fix possible console lock involved deadlock Link: https://lore.kernel.org/all/6822ab02.050a0220.f2294.00cb.GAE@google.com/T/ Reported-by: syzbot+2c3ef91c9523c3d1a25c@syzkaller.appspotmail.com Signed-off-by: Alan Huang Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 - fs/bcachefs/btree_locking.c | 2 +- fs/bcachefs/error.c | 2 +- fs/bcachefs/super.c | 11 +++-------- fs/bcachefs/util.c | 10 ++-------- fs/bcachefs/util.h | 2 +- 6 files changed, 8 insertions(+), 20 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 3651a296d506..5a1cede2febf 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -296,7 +296,6 @@ do { \ #define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n") void bch2_print_str(struct bch_fs *, const char *, const char *); -void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *); __printf(2, 3) void bch2_print_opts(struct bch_opts *, const char *, ...); diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 47035aae232e..91a51aef82f1 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -213,7 +213,7 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g) prt_newline(&buf); } - bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf); + bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf); printbuf_exit(&buf); BUG(); } diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index ff49cebfd0e8..a8ec6aae5738 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -69,7 +69,7 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra if (trans) bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); - bch2_print_str_nonblocking(c, KERN_ERR, buf.buf); + bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); return ret; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 8648f22411be..ae8fa58a208d 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -104,7 +104,7 @@ const char * const bch2_dev_write_refs[] = { #undef x static void __bch2_print_str(struct bch_fs *c, const char *prefix, - const char *str, bool nonblocking) + const char *str) { #ifdef __KERNEL__ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); @@ -114,17 +114,12 @@ static void __bch2_print_str(struct bch_fs *c, const char *prefix, return; } #endif - bch2_print_string_as_lines(KERN_ERR, str, nonblocking); + bch2_print_string_as_lines(KERN_ERR, str); } void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) { - __bch2_print_str(c, prefix, str, false); -} - -void bch2_print_str_nonblocking(struct bch_fs *c, const char *prefix, const char *str) -{ - __bch2_print_str(c, prefix, str, true); + __bch2_print_str(c, prefix, str); } __printf(2, 0) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index dc3817f545fa..df9a6071fe18 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -262,8 +262,7 @@ static bool string_is_spaces(const char *str) return true; } -void bch2_print_string_as_lines(const char *prefix, const char *lines, - bool nonblocking) +void bch2_print_string_as_lines(const char *prefix, const char *lines) { bool locked = false; const char *p; @@ -273,12 +272,7 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines, return; } - if (!nonblocking) { - console_lock(); - locked = true; - } else { - locked = console_trylock(); - } + locked = console_trylock(); while (*lines) { p = strchrnul(lines, '\n'); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 0a4b1d433621..6488f098d140 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -214,7 +214,7 @@ u64 bch2_read_flag_list(const char *, const char * const[]); void bch2_prt_u64_base2_nbits(struct printbuf *, u64, unsigned); void bch2_prt_u64_base2(struct printbuf *, u64); -void bch2_print_string_as_lines(const char *, const char *, bool); +void bch2_print_string_as_lines(const char *, const char *); typedef DARRAY(unsigned long) bch_stacktrace; int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); From f946ce0be45e75801583a5371fccf7466961d9d0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 7 Jun 2025 20:18:16 -0400 Subject: [PATCH 207/497] bcachefs: Make sure opts.read_only gets propagated back to VFS If we think we're read-only but the VFS doesn't, fun will ensue. And now that we know we have to be able to do this safely, just make nochanges imply ro. Reported-by: syzbot+a7d6ceaba099cc21dee4@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 8 ++++++++ fs/bcachefs/recovery.c | 4 +++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 85d13f800165..3063a8ddc2df 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -2490,6 +2490,14 @@ static int bch2_fs_get_tree(struct fs_context *fc) if (ret) goto err_stop_fs; + /* + * We might be doing a RO mount because other options required it, or we + * have no alloc info and it's a small image with no room to regenerate + * it + */ + if (c->opts.read_only) + fc->sb_flags |= SB_RDONLY; + sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); ret = PTR_ERR_OR_ZERO(sb); if (ret) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 06d0ba0fbffb..520eb72c4971 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -752,9 +752,11 @@ int bch2_fs_recovery(struct bch_fs *c) ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read) : BCH_RECOVERY_PASS_snapshots_read; c->opts.nochanges = true; - c->opts.read_only = true; } + if (c->opts.nochanges) + c->opts.read_only = true; + mutex_lock(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); bool write_sb = false; From 757601ef853359fe2d57d75c00b5045f62efc608 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 11:40:00 -0400 Subject: [PATCH 208/497] bcachefs: Don't put rhashtable on stack Object debugging generally needs special provisions for putting said objects on the stack, which rhashtable does not have. Reported-by: syzbot+bcc38a9556d0324c2ec2@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/movinggc.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 6d7b1d5f7697..27e68d470ad0 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -28,7 +28,7 @@ #include struct buckets_in_flight { - struct rhashtable table; + struct rhashtable *table; struct move_bucket *first; struct move_bucket *last; size_t nr; @@ -98,7 +98,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, static void move_bucket_free(struct buckets_in_flight *list, struct move_bucket *b) { - int ret = rhashtable_remove_fast(&list->table, &b->hash, + int ret = rhashtable_remove_fast(list->table, &b->hash, bch_move_bucket_params); BUG_ON(ret); kfree(b); @@ -133,7 +133,7 @@ static void move_buckets_wait(struct moving_context *ctxt, static bool bucket_in_flight(struct buckets_in_flight *list, struct move_bucket_key k) { - return rhashtable_lookup_fast(&list->table, &k, bch_move_bucket_params); + return rhashtable_lookup_fast(list->table, &k, bch_move_bucket_params); } static int bch2_copygc_get_buckets(struct moving_context *ctxt, @@ -185,7 +185,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, goto err; } - ret2 = rhashtable_lookup_insert_fast(&buckets_in_flight->table, &b_i->hash, + ret2 = rhashtable_lookup_insert_fast(buckets_in_flight->table, &b_i->hash, bch_move_bucket_params); BUG_ON(ret2); @@ -350,10 +350,13 @@ static int bch2_copygc_thread(void *arg) struct buckets_in_flight buckets = {}; u64 last, wait; - int ret = rhashtable_init(&buckets.table, &bch_move_bucket_params); + buckets.table = kzalloc(sizeof(*buckets.table), GFP_KERNEL); + int ret = !buckets.table + ? -ENOMEM + : rhashtable_init(buckets.table, &bch_move_bucket_params); bch_err_msg(c, ret, "allocating copygc buckets in flight"); if (ret) - return ret; + goto err; set_freezable(); @@ -421,11 +424,12 @@ static int bch2_copygc_thread(void *arg) } move_buckets_wait(&ctxt, &buckets, true); - rhashtable_destroy(&buckets.table); + rhashtable_destroy(buckets.table); bch2_moving_ctxt_exit(&ctxt); bch2_move_stats_exit(&move_stats, c); - - return 0; +err: + kfree(buckets.table); + return ret; } void bch2_copygc_stop(struct bch_fs *c) From 082c74411491f8b0d31465fc104b8342e66c4056 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 11:58:59 -0400 Subject: [PATCH 209/497] bcachefs: Fix downgrade_table_extra() Fix a UAF: we were calling darray_make_room() and retaining a pointer to the old buffer. And fix an UBSAN warning: struct bch_sb_field_downgrade_entry uses __counted_by, so set dst->nr_errors before assigning to the array entry. Reported-by: syzbot+14c52d86ddbd89bea13e@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-downgrade.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index b61f88450a6d..1506d05e0665 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -253,6 +253,7 @@ DOWNGRADE_TABLE() static int downgrade_table_extra(struct bch_fs *c, darray_char *table) { + unsigned dst_offset = table->nr; struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); int ret = 0; @@ -268,6 +269,9 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) if (ret) return ret; + dst = (void *) &table->data[dst_offset]; + dst->nr_errors = cpu_to_le16(nr_errors + 1); + /* open coded __set_bit_le64, as dst is packed and * dst->recovery_passes is misaligned */ unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations; @@ -278,7 +282,6 @@ static int downgrade_table_extra(struct bch_fs *c, darray_char *table) break; } - dst->nr_errors = cpu_to_le16(nr_errors); return ret; } From 54aacfe3976893ee04582a0bd61967bbee5a7e04 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 12:17:02 -0400 Subject: [PATCH 210/497] bcachefs: Fix rcu_pending for PREEMPT_RT PREEMPT_RT redefines how standard spinlocks work, so local_irq_save() + spin_lock() is no longer equivalent to spin_lock_irqsave(). Fortunately, we don't strictly need to do it that way. Signed-off-by: Kent Overstreet --- fs/bcachefs/rcu_pending.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/bcachefs/rcu_pending.c b/fs/bcachefs/rcu_pending.c index bef2aa1b8bcd..b1438be9d690 100644 --- a/fs/bcachefs/rcu_pending.c +++ b/fs/bcachefs/rcu_pending.c @@ -182,11 +182,6 @@ static inline void kfree_bulk(size_t nr, void ** p) while (nr--) kfree(*p); } - -#define local_irq_save(flags) \ -do { \ - flags = 0; \ -} while (0) #endif static noinline void __process_finished_items(struct rcu_pending *pending, @@ -429,9 +424,15 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN)); - local_irq_save(flags); - p = this_cpu_ptr(pending->p); - spin_lock(&p->lock); + /* We could technically be scheduled before taking the lock and end up + * using a different cpu's rcu_pending_pcpu: that's ok, it needs a lock + * anyways + * + * And we have to do it this way to avoid breaking PREEMPT_RT, which + * redefines how spinlocks work: + */ + p = raw_cpu_ptr(pending->p); + spin_lock_irqsave(&p->lock, flags); rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu); restart: if (may_sleep && @@ -520,9 +521,8 @@ __rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head, goto free_node; } - local_irq_save(flags); - p = this_cpu_ptr(pending->p); - spin_lock(&p->lock); + p = raw_cpu_ptr(pending->p); + spin_lock_irqsave(&p->lock, flags); goto restart; } From 9e48f574e55731106b26dc33d8b0be1adedf3f20 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 9 Jun 2025 17:30:40 -0400 Subject: [PATCH 211/497] bcachefs: Fix leak in bch2_fs_recovery() error path Fix a small leak of the superblock 'clean' section. Signed-off-by: Kent Overstreet --- fs/bcachefs/recovery.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 520eb72c4971..0b21fa6ff062 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -1108,9 +1108,6 @@ int bch2_fs_recovery(struct bch_fs *c) out: bch2_flush_fsck_errs(c); - if (!IS_ERR(clean)) - kfree(clean); - if (!ret && test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) && !c->opts.nochanges) { @@ -1119,6 +1116,9 @@ int bch2_fs_recovery(struct bch_fs *c) } bch_err_fn(c, ret); +final_out: + if (!IS_ERR(clean)) + kfree(clean); return ret; err: fsck_err: @@ -1132,7 +1132,7 @@ int bch2_fs_recovery(struct bch_fs *c) bch2_print_str(c, KERN_ERR, buf.buf); printbuf_exit(&buf); } - return ret; + goto final_out; } int bch2_fs_initialize(struct bch_fs *c) From c3dd25319c1818e067f41f41127f935f153498e6 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 9 Jun 2025 17:28:00 -0400 Subject: [PATCH 212/497] bcachefs: Don't pass trans to fsck_err() in gc_accounting_done fsck_err() can return a transaction restart if passed a transaction object - this has always been true when it has to drop locks to prompt for user input, but we're seeing this more now that we're logging the error being corrected in the journal. gc_accounting_done() doesn't call fsck_err() from an actual commit loop, and it doesn't need to be holding btree locks when it calls fsck_err(), so the easy fix here for the unhandled transaction restart is to just not pass it the transaction object. We'll miss out on the fancy new logging, but that's ok. Signed-off-by: Kent Overstreet --- fs/bcachefs/disk_accounting.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index 3d59a57a5256..f7528cd69c73 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -618,7 +618,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) for (unsigned j = 0; j < nr; j++) src_v[j] -= dst_v[j]; - if (fsck_err(trans, accounting_mismatch, "%s", buf.buf)) { + bch2_trans_unlock_long(trans); + + if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) { percpu_up_write(&c->mark_lock); ret = commit_do(trans, NULL, NULL, 0, bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false)); From e82b3a63a9a91cc5c585efb3e28f32100f24e3e1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jun 2025 11:24:04 +0200 Subject: [PATCH 213/497] bcachefs: ioctl: avoid stack overflow warning Multiple ioctl handlers individually use a lot of stack space, and clang chooses to inline them into the bch2_fs_ioctl() function, blowing through the warning limit: fs/bcachefs/chardev.c:655:6: error: stack frame size (1032) exceeds limit (1024) in 'bch2_fs_ioctl' [-Werror,-Wframe-larger-than] 655 | long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) By marking the largest two of them as noinline_for_stack, no indidual code path ends up using this much, which avoids the warning and reduces the possible total stack usage in the ioctl handler. Signed-off-by: Arnd Bergmann Signed-off-by: Kent Overstreet --- fs/bcachefs/chardev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 2d38466eddfd..fde3c2380e28 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -399,7 +399,7 @@ static long bch2_ioctl_data(struct bch_fs *c, return ret; } -static long bch2_ioctl_fs_usage(struct bch_fs *c, +static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, struct bch_ioctl_fs_usage __user *user_arg) { struct bch_ioctl_fs_usage arg = {}; @@ -469,7 +469,7 @@ static long bch2_ioctl_query_accounting(struct bch_fs *c, } /* obsolete, didn't allow for new data types: */ -static long bch2_ioctl_dev_usage(struct bch_fs *c, +static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { struct bch_ioctl_dev_usage arg; From 625c494db95624f09f5e7b81b64d4da34e45bd2a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Jun 2025 13:32:58 -0400 Subject: [PATCH 214/497] bcachefs: Fix version checks in validate_bset() It seems btree node scan picked up a partially overwritten btree node, and corrected the "bset version older than sb version_min" error - resulting in an invalid superblock with a bad version_min field. Don't run this check at all when we're in btree node scan, and when we do run it, do something saner if the bset version is totally crazy. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 6787d5b91eab..d8f3c4c65e90 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -741,16 +741,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BCH_VERSION_MAJOR(version), BCH_VERSION_MINOR(version)); - if (btree_err_on(version < c->sb.version_min, + if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes && + btree_err_on(version < c->sb.version_min, -BCH_ERR_btree_node_read_err_fixable, c, NULL, b, i, NULL, btree_node_bset_older_than_sb_min, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { - mutex_lock(&c->sb_lock); - c->disk_sb.sb->version_min = cpu_to_le16(version); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + if (bch2_version_compatible(version)) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = cpu_to_le16(version); + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } else { + /* We have no idea what's going on: */ + i->version = cpu_to_le16(c->sb.version); + } } if (btree_err_on(BCH_VERSION_MAJOR(version) > From 205da7c026739d965e605d39001049c7b6728e87 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 11:31:23 -0400 Subject: [PATCH 215/497] bcachefs: Don't trust sb->nr_devices in members_to_text() We have to be able to print superblock sections even if they fail to validate (for debugging), so we have to calculate the number of entries from the field size. Reported-by: syzbot+5138f00559ffb3cb3610@syzkaller.appspotmail.com Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-members.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 363eb0c6eb7c..6245e342a8a8 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -325,9 +325,17 @@ static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; + } + + unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / sizeof(mi->_members[0]); + if (nr != sb->nr_devices) + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) member_to_text(out, members_v1_get(mi, i), gi, sb, i); } @@ -341,9 +349,27 @@ static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, { struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); - unsigned i; - for (i = 0; i < sb->nr_devices; i++) + if (vstruct_end(&mi->field) <= (void *) &mi->_members[0]) { + prt_printf(out, "field ends before start of entries"); + return; + } + + if (!le16_to_cpu(mi->member_bytes)) { + prt_printf(out, "member_bytes 0"); + return; + } + + unsigned nr = (vstruct_end(&mi->field) - (void *) &mi->_members[0]) / le16_to_cpu(mi->member_bytes); + if (nr != sb->nr_devices) + prt_printf(out, "nr_devices mismatch: have %i entries, should be %u", nr, sb->nr_devices); + + /* + * We call to_text() on superblock sections that haven't passed + * validate, so we can't trust sb->nr_devices. + */ + + for (unsigned i = 0; i < min(sb->nr_devices, nr); i++) member_to_text(out, members_v2_get(mi, i), gi, sb, i); } From b68baf9a87330148d3ad074e48503a4e9c5c3929 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Jun 2025 15:57:48 -0400 Subject: [PATCH 216/497] bcachefs: Print devices we're mounting on multi device filesystems Previously, we only ever logged the filesystem UUID. Signed-off-by: Kent Overstreet --- fs/bcachefs/super.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index ae8fa58a208d..a5b97c9c5163 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1067,12 +1067,13 @@ noinline_for_stack static void print_mount_opts(struct bch_fs *c) { enum bch_opt_id i; - struct printbuf p = PRINTBUF; - bool first = true; + CLASS(printbuf, p)(); + bch2_log_msg_start(c, &p); prt_str(&p, "starting version "); bch2_version_to_text(&p, c->sb.version); + bool first = true; for (i = 0; i < bch2_opts_nr; i++) { const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); @@ -1089,17 +1090,24 @@ static void print_mount_opts(struct bch_fs *c) } if (c->sb.version_incompat_allowed != c->sb.version) { - prt_printf(&p, "\n allowing incompatible features above "); + prt_printf(&p, "\nallowing incompatible features above "); bch2_version_to_text(&p, c->sb.version_incompat_allowed); } if (c->opts.verbose) { - prt_printf(&p, "\n features: "); + prt_printf(&p, "\nfeatures: "); prt_bitflags(&p, bch2_sb_features, c->sb.features); } - bch_info(c, "%s", p.buf); - printbuf_exit(&p); + if (c->sb.multi_device) { + prt_printf(&p, "\nwith devices"); + for_each_online_member(c, ca, BCH_DEV_READ_REF_bch2_online_devs) { + prt_char(&p, ' '); + prt_str(&p, ca->name); + } + } + + bch2_print_str(c, KERN_INFO, p.buf); } static bool bch2_fs_may_start(struct bch_fs *c) From cd1124244be30fd3e87da9186508aab371e9307d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 8 Jun 2025 12:35:20 -0400 Subject: [PATCH 217/497] bcachefs: Ensure that snapshot creation propagates has_case_insensitive We normally can't create a new directory with the case-insensitive option already set - except when we're creating a snapshot. And if casefolding is enabled filesystem wide, we should still set it even though not strictly required, for consistency. Signed-off-by: Kent Overstreet --- fs/bcachefs/namei.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 24120037c031..779c22eb3979 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -175,6 +175,16 @@ int bch2_create_trans(struct btree_trans *trans, new_inode->bi_dir_offset = dir_offset; } + if (S_ISDIR(mode)) { + ret = bch2_maybe_propagate_has_case_insensitive(trans, + (subvol_inum) { + new_inode->bi_subvol ?: dir.subvol, + new_inode->bi_inum }, + new_inode); + if (ret) + goto err; + } + if (S_ISDIR(mode) && !new_inode->bi_subvol) new_inode->bi_depth = dir_u->bi_depth + 1; From aef22f6fe7a630d536f9eaa0a7a2ed0f90ea369e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 10 Jun 2025 22:32:14 -0400 Subject: [PATCH 218/497] bcachefs: Don't trace should_be_locked unless changing Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_locking.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index 9adca77e2580..f2173a3316f4 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -417,8 +417,10 @@ static inline void btree_path_set_should_be_locked(struct btree_trans *trans, st EBUG_ON(!btree_node_locked(path, path->level)); EBUG_ON(path->uptodate); - path->should_be_locked = true; - trace_btree_path_should_be_locked(trans, path); + if (!path->should_be_locked) { + path->should_be_locked = true; + trace_btree_path_should_be_locked(trans, path); + } } static inline void __btree_path_set_level_up(struct btree_trans *trans, From aa2024c01a9afba6728b362626f868811ca872ee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 10 Jun 2025 20:10:18 -0400 Subject: [PATCH 219/497] KVM: x86/mmu: Embed direct bits into gpa for KVM_PRE_FAULT_MEMORY Bug[*] reported for TDX case when enabling KVM_PRE_FAULT_MEMORY in QEMU. It turns out that @gpa passed to kvm_mmu_do_page_fault() doesn't have shared bit set when the memory attribute of it is shared, and it leads to wrong root in tdp_mmu_get_root_for_fault(). Fix it by embedding the direct bits in the gpa that is passed to kvm_tdp_map_page(), when the memory of the gpa is not private. [*] https://lore.kernel.org/qemu-devel/4a757796-11c2-47f1-ae0d-335626e818fd@intel.com/ Reported-by: Xiaoyao Li Closes: https://lore.kernel.org/qemu-devel/4a757796-11c2-47f1-ae0d-335626e818fd@intel.com/ Signed-off-by: Paolo Bonzini Signed-off-by: Xiaoyao Li Message-ID: <20250611001018.2179964-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index cbc84c6abc2e..a4040578b537 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4896,6 +4896,7 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, { u64 error_code = PFERR_GUEST_FINAL_MASK; u8 level = PG_LEVEL_4K; + u64 direct_bits; u64 end; int r; @@ -4910,15 +4911,18 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, if (r) return r; + direct_bits = 0; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) error_code |= PFERR_PRIVATE_ACCESS; + else + direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(vcpu->kvm)); /* * Shadow paging uses GVA for kvm page fault, so restrict to * two-dimensional paging. */ - r = kvm_tdp_map_page(vcpu, range->gpa, error_code, &level); + r = kvm_tdp_map_page(vcpu, range->gpa | direct_bits, error_code, &level); if (r < 0) return r; From 8046d29dde17002523f94d3e6e0ebe486ce52166 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Jun 2025 00:47:44 -0400 Subject: [PATCH 220/497] KVM: x86/mmu: Reject direct bits in gpa passed to KVM_PRE_FAULT_MEMORY Only let userspace pass the same addresses that were used in KVM_SET_USER_MEMORY_REGION (or KVM_SET_USER_MEMORY_REGION2); gpas in the the upper half of the address space are an implementation detail of TDX and KVM. Extracted from a patch by Sean Christopherson . Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a4040578b537..4e06e2e89a8f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4903,6 +4903,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, if (!vcpu->kvm->arch.pre_fault_allowed) return -EOPNOTSUPP; + if (kvm_is_gfn_alias(vcpu->kvm, gpa_to_gfn(range->gpa))) + return -EINVAL; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. From 33db8c97b4cfa0328054fb755dfbcd6e7f3c7a9d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 9 Jun 2025 14:26:54 +0530 Subject: [PATCH 221/497] rust: Use CpuId in place of raw CPU numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the newly defined `CpuId` abstraction instead of raw CPU numbers. This also fixes a doctest failure for configurations where `nr_cpu_ids < 4`. The C `cpumask_{set|clear}_cpu()` APIs emit a warning when given an invalid CPU number — but only if `CONFIG_DEBUG_PER_CPU_MAPS=y` is set. Meanwhile, `cpumask_weight()` only considers CPUs up to `nr_cpu_ids`, which can cause inconsistencies: a CPU number greater than `nr_cpu_ids` may be set in the mask, yet the weight calculation won't reflect it. This leads to doctest failures when `nr_cpu_ids < 4`, as the test tries to set CPUs 2 and 3: rust_doctest_kernel_cpumask_rs_0.location: rust/kernel/cpumask.rs:180 rust_doctest_kernel_cpumask_rs_0: ASSERTION FAILED at rust/kernel/cpumask.rs:190 Fixes: 8961b8cb3099 ("rust: cpumask: Add initial abstractions") Reported-by: Miguel Ojeda Closes: https://lore.kernel.org/rust-for-linux/CANiq72k3ozKkLMinTLQwvkyg9K=BeRxs1oYZSKhJHY-veEyZdg@mail.gmail.com/ Reported-by: Andreas Hindborg Closes: https://lore.kernel.org/all/87qzzy3ric.fsf@kernel.org/ Suggested-by: Boqun Feng Signed-off-by: Viresh Kumar Reviewed-by: Boqun Feng --- drivers/cpufreq/rcpufreq_dt.rs | 4 +-- rust/kernel/cpu.rs | 4 +-- rust/kernel/cpufreq.rs | 27 ++++++++++++------ rust/kernel/cpumask.rs | 51 ++++++++++++++++++++++++---------- 4 files changed, 59 insertions(+), 27 deletions(-) diff --git a/drivers/cpufreq/rcpufreq_dt.rs b/drivers/cpufreq/rcpufreq_dt.rs index 94ed81644fe1..43c87d0259b6 100644 --- a/drivers/cpufreq/rcpufreq_dt.rs +++ b/drivers/cpufreq/rcpufreq_dt.rs @@ -26,9 +26,9 @@ fn find_supply_name_exact(dev: &Device, name: &str) -> Option { } /// Finds supply name for the CPU from DT. -fn find_supply_names(dev: &Device, cpu: u32) -> Option> { +fn find_supply_names(dev: &Device, cpu: cpu::CpuId) -> Option> { // Try "cpu0" for older DTs, fallback to "cpu". - let name = (cpu == 0) + let name = (cpu.as_u32() == 0) .then(|| find_supply_name_exact(dev, "cpu0")) .flatten() .or_else(|| find_supply_name_exact(dev, "cpu"))?; diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs index 6a3aecb12468..abc780d7a8ec 100644 --- a/rust/kernel/cpu.rs +++ b/rust/kernel/cpu.rs @@ -127,9 +127,9 @@ fn from(id: CpuId) -> Self { /// Callers must ensure that the CPU device is not used after it has been unregistered. /// This can be achieved, for example, by registering a CPU hotplug notifier and removing /// any references to the CPU device within the notifier's callback. -pub unsafe fn from_cpu(cpu: u32) -> Result<&'static Device> { +pub unsafe fn from_cpu(cpu: CpuId) -> Result<&'static Device> { // SAFETY: It is safe to call `get_cpu_device()` for any CPU. - let ptr = unsafe { bindings::get_cpu_device(cpu) }; + let ptr = unsafe { bindings::get_cpu_device(u32::from(cpu)) }; if ptr.is_null() { return Err(ENODEV); } diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 9b995f18aac6..11b03e9d7e89 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -10,6 +10,7 @@ use crate::{ clk::Hertz, + cpu::CpuId, cpumask, device::{Bound, Device}, devres::Devres, @@ -465,8 +466,9 @@ fn as_mut_ref(&mut self) -> &mut bindings::cpufreq_policy { /// Returns the primary CPU for the [`Policy`]. #[inline] - pub fn cpu(&self) -> u32 { - self.as_ref().cpu + pub fn cpu(&self) -> CpuId { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + unsafe { CpuId::from_u32_unchecked(self.as_ref().cpu) } } /// Returns the minimum frequency for the [`Policy`]. @@ -525,7 +527,7 @@ pub fn generic_suspend(&mut self) -> Result { #[inline] pub fn generic_get(&self) -> Result { // SAFETY: By the type invariant, the pointer stored in `self` is valid. - Ok(unsafe { bindings::cpufreq_generic_get(self.cpu()) }) + Ok(unsafe { bindings::cpufreq_generic_get(u32::from(self.cpu())) }) } /// Provides a wrapper to the register with energy model using the OPP core. @@ -678,9 +680,9 @@ fn clear_data(&mut self) -> Option { struct PolicyCpu<'a>(&'a mut Policy); impl<'a> PolicyCpu<'a> { - fn from_cpu(cpu: u32) -> Result { + fn from_cpu(cpu: CpuId) -> Result { // SAFETY: It is safe to call `cpufreq_cpu_get` for any valid CPU. - let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(cpu) })?; + let ptr = from_err_ptr(unsafe { bindings::cpufreq_cpu_get(u32::from(cpu)) })?; Ok(Self( // SAFETY: The `ptr` is guaranteed to be valid and remains valid for the lifetime of @@ -1266,7 +1268,10 @@ impl Registration { target_perf: usize, capacity: usize, ) { - if let Ok(mut policy) = PolicyCpu::from_cpu(cpu) { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) }; + + if let Ok(mut policy) = PolicyCpu::from_cpu(cpu_id) { T::adjust_perf(&mut policy, min_perf, target_perf, capacity); } } @@ -1321,7 +1326,10 @@ impl Registration { /// /// - This function may only be called from the cpufreq C infrastructure. unsafe extern "C" fn get_callback(cpu: u32) -> kernel::ffi::c_uint { - PolicyCpu::from_cpu(cpu).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_u32_unchecked(cpu) }; + + PolicyCpu::from_cpu(cpu_id).map_or(0, |mut policy| T::get(&mut policy).map_or(0, |f| f)) } /// Driver's `update_limit` callback. @@ -1344,8 +1352,11 @@ impl Registration { /// - This function may only be called from the cpufreq C infrastructure. /// - The pointer arguments must be valid pointers. unsafe extern "C" fn bios_limit_callback(cpu: i32, limit: *mut u32) -> kernel::ffi::c_int { + // SAFETY: The C API guarantees that `cpu` refers to a valid CPU number. + let cpu_id = unsafe { CpuId::from_i32_unchecked(cpu) }; + from_result(|| { - let mut policy = PolicyCpu::from_cpu(cpu as u32)?; + let mut policy = PolicyCpu::from_cpu(cpu_id)?; // SAFETY: `limit` is guaranteed by the C code to be valid. T::bios_limit(&mut policy, &mut (unsafe { *limit })).map(|()| 0) diff --git a/rust/kernel/cpumask.rs b/rust/kernel/cpumask.rs index c90bfac9346a..19c607709b5f 100644 --- a/rust/kernel/cpumask.rs +++ b/rust/kernel/cpumask.rs @@ -6,6 +6,7 @@ use crate::{ alloc::{AllocError, Flags}, + cpu::CpuId, prelude::*, types::Opaque, }; @@ -35,9 +36,10 @@ /// /// ``` /// use kernel::bindings; +/// use kernel::cpu::CpuId; /// use kernel::cpumask::Cpumask; /// -/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: u32, clear_cpu: i32) { +/// fn set_clear_cpu(ptr: *mut bindings::cpumask, set_cpu: CpuId, clear_cpu: CpuId) { /// // SAFETY: The `ptr` is valid for writing and remains valid for the lifetime of the /// // returned reference. /// let mask = unsafe { Cpumask::as_mut_ref(ptr) }; @@ -90,9 +92,9 @@ pub fn as_raw(&self) -> *mut bindings::cpumask { /// This mismatches kernel naming convention and corresponds to the C /// function `__cpumask_set_cpu()`. #[inline] - pub fn set(&mut self, cpu: u32) { + pub fn set(&mut self, cpu: CpuId) { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `__cpumask_set_cpu`. - unsafe { bindings::__cpumask_set_cpu(cpu, self.as_raw()) }; + unsafe { bindings::__cpumask_set_cpu(u32::from(cpu), self.as_raw()) }; } /// Clear `cpu` in the cpumask. @@ -101,19 +103,19 @@ pub fn set(&mut self, cpu: u32) { /// This mismatches kernel naming convention and corresponds to the C /// function `__cpumask_clear_cpu()`. #[inline] - pub fn clear(&mut self, cpu: i32) { + pub fn clear(&mut self, cpu: CpuId) { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to // `__cpumask_clear_cpu`. - unsafe { bindings::__cpumask_clear_cpu(cpu, self.as_raw()) }; + unsafe { bindings::__cpumask_clear_cpu(i32::from(cpu), self.as_raw()) }; } /// Test `cpu` in the cpumask. /// /// Equivalent to the kernel's `cpumask_test_cpu` API. #[inline] - pub fn test(&self, cpu: i32) -> bool { + pub fn test(&self, cpu: CpuId) -> bool { // SAFETY: By the type invariant, `self.as_raw` is a valid argument to `cpumask_test_cpu`. - unsafe { bindings::cpumask_test_cpu(cpu, self.as_raw()) } + unsafe { bindings::cpumask_test_cpu(i32::from(cpu), self.as_raw()) } } /// Set all CPUs in the cpumask. @@ -178,21 +180,40 @@ pub fn copy(&self, dstp: &mut Self) { /// The following example demonstrates how to create and update a [`CpumaskVar`]. /// /// ``` +/// use kernel::cpu::CpuId; /// use kernel::cpumask::CpumaskVar; /// /// let mut mask = CpumaskVar::new_zero(GFP_KERNEL).unwrap(); /// /// assert!(mask.empty()); -/// mask.set(2); -/// assert!(mask.test(2)); -/// mask.set(3); -/// assert!(mask.test(3)); -/// assert_eq!(mask.weight(), 2); +/// let mut count = 0; +/// +/// let cpu2 = CpuId::from_u32(2); +/// if let Some(cpu) = cpu2 { +/// mask.set(cpu); +/// assert!(mask.test(cpu)); +/// count += 1; +/// } +/// +/// let cpu3 = CpuId::from_u32(3); +/// if let Some(cpu) = cpu3 { +/// mask.set(cpu); +/// assert!(mask.test(cpu)); +/// count += 1; +/// } +/// +/// assert_eq!(mask.weight(), count); /// /// let mask2 = CpumaskVar::try_clone(&mask).unwrap(); -/// assert!(mask2.test(2)); -/// assert!(mask2.test(3)); -/// assert_eq!(mask2.weight(), 2); +/// +/// if let Some(cpu) = cpu2 { +/// assert!(mask2.test(cpu)); +/// } +/// +/// if let Some(cpu) = cpu3 { +/// assert!(mask2.test(cpu)); +/// } +/// assert_eq!(mask2.weight(), count); /// ``` pub struct CpumaskVar { #[cfg(CONFIG_CPUMASK_OFFSTACK)] From c7f005f70d22cd5613cac30bf6d34867189e36a9 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 9 Jun 2025 16:44:16 +0530 Subject: [PATCH 222/497] rust: cpu: Add CpuId::current() to retrieve current CPU ID Introduce `CpuId::current()`, a constructor that wraps the C function `raw_smp_processor_id()` to retrieve the current CPU identifier without guaranteeing stability. This function should be used only when the caller can ensure that the CPU ID won't change unexpectedly due to preemption or migration. Suggested-by: Boqun Feng Signed-off-by: Viresh Kumar Reviewed-by: Boqun Feng --- MAINTAINERS | 1 + rust/helpers/cpu.c | 8 ++++++++ rust/helpers/helpers.c | 1 + rust/kernel/cpu.rs | 11 +++++++++++ 4 files changed, 21 insertions(+) create mode 100644 rust/helpers/cpu.c diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..4255186784c4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6254,6 +6254,7 @@ F: include/linux/cpuhotplug.h F: include/linux/smpboot.h F: kernel/cpu.c F: kernel/smpboot.* +F: rust/helper/cpu.c F: rust/kernel/cpu.rs CPU IDLE TIME MANAGEMENT FRAMEWORK diff --git a/rust/helpers/cpu.c b/rust/helpers/cpu.c new file mode 100644 index 000000000000..824e0adb19d4 --- /dev/null +++ b/rust/helpers/cpu.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +unsigned int rust_helper_raw_smp_processor_id(void) +{ + return raw_smp_processor_id(); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 0f1b5d115985..16fa9bca5949 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -13,6 +13,7 @@ #include "build_assert.c" #include "build_bug.c" #include "clk.c" +#include "cpu.c" #include "cpufreq.c" #include "cpumask.c" #include "cred.c" diff --git a/rust/kernel/cpu.rs b/rust/kernel/cpu.rs index abc780d7a8ec..b75403b0eb56 100644 --- a/rust/kernel/cpu.rs +++ b/rust/kernel/cpu.rs @@ -102,6 +102,17 @@ pub fn from_u32(id: u32) -> Option { pub fn as_u32(&self) -> u32 { self.0 } + + /// Returns the ID of the CPU the code is currently running on. + /// + /// The returned value is considered unstable because it may change + /// unexpectedly due to preemption or CPU migration. It should only be + /// used when the context ensures that the task remains on the same CPU + /// or the users could use a stale (yet valid) CPU ID. + pub fn current() -> Self { + // SAFETY: raw_smp_processor_id() always returns a valid CPU ID. + unsafe { Self::from_u32_unchecked(bindings::raw_smp_processor_id()) } + } } impl From for u32 { From b1a529bdb9641392b4f5fc91545598793c0b3b96 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 5 Jun 2025 22:34:31 +0100 Subject: [PATCH 223/497] selftests/mm: skip failed memfd setups in gup_longterm Unlike the other cases gup_longterm's memfd tests previously skipped the test when failing to set up the file descriptor to test. Restore this behavior to avoid hitting failures when hugetlb isn't configured. Link: https://lkml.kernel.org/r/20250605-selftest-mm-gup-longterm-tweaks-v1-1-2fae34b05958@kernel.org Fixes: 66bce7afbaca ("selftests/mm: fix test result reporting in gup_longterm") Signed-off-by: Mark Brown Reported-by: Lorenzo Stoakes Closes: https://lkml.kernel.org/r/a76fc252-0fe3-4d4b-a9a1-4a2895c2680d@lucifer.local Reviewed-by: Lorenzo Stoakes Tested-by: Lorenzo Stoakes Acked-by: David Hildenbrand Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/gup_longterm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 8a97ac5176a4..29047d2e0c49 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -298,8 +298,11 @@ static void run_with_memfd(test_fn fn, const char *desc) log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); - if (fd < 0) + if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; + } fn(fd, pagesize); close(fd); @@ -366,6 +369,8 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, fd = memfd_create("test", flags); if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; } fn(fd, hugetlbsize); From 331843c845d15413e9d89fd91cdcfa6912e291c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jun 2025 17:23:37 -0700 Subject: [PATCH 224/497] scatterlist: fix extraneous '@'-sign kernel-doc notation Using "@argname@" in kernel-doc produces "argname****" (with "argname" in bold) in the generated html output, so use the expected kernel-doc notation of just "@argname" instead. "Fixes:" lines are added in case Matthew's patch [1] is backported. Link: https://lkml.kernel.org/r/20250605002337.2842659-1-rdunlap@infradead.org Link: https://lore.kernel.org/linux-doc/3bc4e779-7a79-42c1-8867-024f643a22fc@infradead.org/T/#m5d2bd9d21fb34f297aa4e7db069f09bc27b89007 [1] Fixes: 0db9299f48eb ("SG: Move functions to lib/scatterlist.c and add sg chaining allocator helpers") Fixes: 8d1d4b538bb1 ("scatterlist: inline sg_next()") Fixes: 18dabf473e15 ("Change table chaining layout") Signed-off-by: Randy Dunlap Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/scatterlist.h | 4 ++-- lib/scatterlist.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 0cdbfc42f153..6f8a4965f9b9 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -99,7 +99,7 @@ static inline bool sg_is_last(struct scatterlist *sg) * @sg: The current sg entry * * Description: - * Usually the next entry will be @sg@ + 1, but if this sg element is part + * Usually the next entry will be @sg + 1, but if this sg element is part * of a chained scatterlist, it could jump to the start of a new * scatterlist array. * @@ -254,7 +254,7 @@ static inline void __sg_chain(struct scatterlist *chain_sg, * @sgl: Second scatterlist * * Description: - * Links @prv@ and @sgl@ together, to form a longer scatterlist. + * Links @prv and @sgl together, to form a longer scatterlist. * **/ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 7582dfab7fe3..4af1c8b0775a 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -73,9 +73,9 @@ EXPORT_SYMBOL(sg_nents_for_len); * Should only be used casually, it (currently) scans the entire list * to get the last entry. * - * Note that the @sgl@ pointer passed in need not be the first one, - * the important bit is that @nents@ denotes the number of entries that - * exist from @sgl@. + * Note that the @sgl pointer passed in need not be the first one, + * the important bit is that @nents denotes the number of entries that + * exist from @sgl. * **/ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) @@ -345,7 +345,7 @@ EXPORT_SYMBOL(__sg_alloc_table); * @gfp_mask: GFP allocation mask * * Description: - * Allocate and initialize an sg table. If @nents@ is larger than + * Allocate and initialize an sg table. If @nents is larger than * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. * **/ From 1b8e4091ffb4655c761354eb33f41b618e809427 Mon Sep 17 00:00:00 2001 From: wangfushuai Date: Sat, 7 Jun 2025 23:36:14 +0800 Subject: [PATCH 225/497] docs: proc: update VmFlags documentation in smaps Remove outdated VM_DENYWRITE("dw") reference and add missing VM_LOCKONFAULT("lf") and VM_UFFD_MINOR("ui") flags. [akpm@linux-foundation.org: add "dp" (VM_DROPPABLE), per Tal] Link: https://lkml.kernel.org/r/20250607153614.81914-1-wangfushuai@baidu.com Signed-off-by: wangfushuai Cc: Andrii Nakryiko Cc: Catalin Marinas Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Mariano Pache Cc: xu xin Cc: Tal Zussman Signed-off-by: Andrew Morton --- Documentation/filesystems/proc.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 2a17865dfe39..5236cb52e357 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -584,7 +584,6 @@ encoded manner. The codes are the following: ms may share gd stack segment growns down pf pure PFN range - dw disabled write to the mapped file lo pages are locked in memory io memory mapped I/O area sr sequential read advise provided @@ -607,8 +606,11 @@ encoded manner. The codes are the following: mt arm64 MTE allocation tags are enabled um userfaultfd missing tracking uw userfaultfd wr-protect tracking + ui userfaultfd minor fault ss shadow/guarded control stack page sl sealed + lf lock on fault pages + dp always lazily freeable mapping == ======================================= Note that there is no guarantee that every flag and associated mnemonic will From 0cf4b1687a187ba9247c71721d8b064634eda1f7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 6 Jun 2025 13:50:32 +0100 Subject: [PATCH 226/497] mm/vma: reset VMA iterator on commit_merge() OOM failure While an OOM failure in commit_merge() isn't really feasible due to the allocation which might fail (a maple tree pre-allocation) being 'too small to fail', we do need to handle this case correctly regardless. In vma_merge_existing_range(), we can theoretically encounter failures which result in an OOM error in two ways - firstly dup_anon_vma() might fail with an OOM error, and secondly commit_merge() failing, ultimately, to pre-allocate a maple tree node. The abort logic for dup_anon_vma() resets the VMA iterator to the initial range, ensuring that any logic looping on this iterator will correctly proceed to the next VMA. However the commit_merge() abort logic does not do the same thing. This resulted in a syzbot report occurring because mlockall() iterates through VMAs, is tolerant of errors, but ended up with an incorrect previous VMA being specified due to incorrect iterator state. While making this change, it became apparent we are duplicating logic - the logic introduced in commit 41e6ddcaa0f1 ("mm/vma: add give_up_on_oom option on modify/merge, use in uffd release") duplicates the vmg->give_up_on_oom check in both abort branches. Additionally, we observe that we can perform the anon_dup check safely on dup_anon_vma() failure, as this will not be modified should this call fail. Finally, we need to reset the iterator in both cases, so now we can simply use the exact same code to abort for both. We remove the VM_WARN_ON(err != -ENOMEM) as it would be silly for this to be otherwise and it allows us to implement the abort check more neatly. Link: https://lkml.kernel.org/r/20250606125032.164249-1-lorenzo.stoakes@oracle.com Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure") Signed-off-by: Lorenzo Stoakes Reported-by: syzbot+d16409ea9ecc16ed261a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-mm/6842cc67.a00a0220.29ac89.003b.GAE@google.com/ Reviewed-by: Pedro Falcato Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Cc: Jann Horn Cc: Signed-off-by: Andrew Morton --- mm/vma.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/mm/vma.c b/mm/vma.c index 726b2a31ce59..0fb9b2c7b734 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -967,26 +967,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( err = dup_anon_vma(next, middle, &anon_dup); } - if (err) + if (err || commit_merge(vmg)) goto abort; - err = commit_merge(vmg); - if (err) { - VM_WARN_ON(err != -ENOMEM); - - if (anon_dup) - unlink_anon_vmas(anon_dup); - - /* - * We've cleaned up any cloned anon_vma's, no VMAs have been - * modified, no harm no foul if the user requests that we not - * report this and just give up, leaving the VMAs unmerged. - */ - if (!vmg->give_up_on_oom) - vmg->state = VMA_MERGE_ERROR_NOMEM; - return NULL; - } - khugepaged_enter_vma(vmg->target, vmg->flags); vmg->state = VMA_MERGE_SUCCESS; return vmg->target; @@ -995,6 +978,9 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( vma_iter_set(vmg->vmi, start); vma_iter_load(vmg->vmi); + if (anon_dup) + unlink_anon_vmas(anon_dup); + /* * This means we have failed to clone anon_vma's correctly, but no * actual changes to VMAs have occurred, so no harm no foul - if the From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Fri, 6 Jun 2025 10:28:07 +0100 Subject: [PATCH 227/497] mm: close theoretical race where stale TLB entries could linger Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") described a theoretical race as such: """ Nadav Amit identified a theoretical race between page reclaim and mprotect due to TLB flushes being batched outside of the PTL being held. He described the race as follows: CPU0 CPU1 ---- ---- user accesses memory using RW PTE [PTE now cached in TLB] try_to_unmap_one() ==> ptep_get_and_clear() ==> set_tlb_ubc_flush_pending() mprotect(addr, PROT_READ) ==> change_pte_range() ==> [ PTE non-present - no flush ] user writes using cached RW PTE ... try_to_unmap_flush() The same type of race exists for reads when protecting for PROT_NONE and also exists for operations that can leave an old TLB entry behind such as munmap, mremap and madvise. """ The solution was to introduce flush_tlb_batched_pending() and call it under the PTL from mprotect/madvise/munmap/mremap to complete any pending tlb flushes. However, while madvise_free_pte_range() and madvise_cold_or_pageout_pte_range() were both retro-fitted to call flush_tlb_batched_pending() immediately after initially acquiring the PTL, they both temporarily release the PTL to split a large folio if they stumble upon one. In this case, where re-acquiring the PTL flush_tlb_batched_pending() must be called again, but it previously was not. Let's fix that. There are 2 Fixes: tags here: the first is the commit that fixed madvise_free_pte_range(). The second is the commit that added madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the faulty pattern from madvise_free_pte_range(). This is a theoretical bug discovered during code review. Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD") Signed-off-by: Ryan Roberts Reviewed-by: Jann Horn Acked-by: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Mel Gorman Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/madvise.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/madvise.c b/mm/madvise.c index 5f7a66a1617e..1d44a35ae85c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -508,6 +508,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, pte_offset_map_lock(mm, pmd, addr, &ptl); if (!start_pte) break; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); if (!err) nr = 0; @@ -741,6 +742,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, start_pte = pte; if (!start_pte) break; + flush_tlb_batched_pending(mm); arch_enter_lazy_mmu_mode(); if (!err) nr = 0; From 50695153d7ddde3b1696dbf0085be0033bf3ddb3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 7 Jun 2025 17:43:18 -0700 Subject: [PATCH 228/497] drivers/rapidio/rio_cm.c: prevent possible heap overwrite In riocm_cdev_ioctl(RIO_CM_CHAN_SEND) -> cm_chan_msg_send() -> riocm_ch_send() cm_chan_msg_send() checks that userspace didn't send too much data but riocm_ch_send() failed to check that userspace sent sufficient data. The result is that riocm_ch_send() can write to fields in the rio_ch_chan_hdr which were outside the bounds of the space which cm_chan_msg_send() allocated. Address this by teaching riocm_ch_send() to check that the entire rio_ch_chan_hdr was copied in from userspace. Reported-by: maher azz Cc: Matt Porter Cc: Alexandre Bounine Cc: Linus Torvalds Cc: Signed-off-by: Andrew Morton --- drivers/rapidio/rio_cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 97287e838ce1..66464674223f 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -783,6 +783,9 @@ static int riocm_ch_send(u16 ch_id, void *buf, int len) if (buf == NULL || ch_id == 0 || len == 0 || len > RIO_MAX_MSG_SIZE) return -EINVAL; + if (len < sizeof(struct rio_ch_chan_hdr)) + return -EINVAL; /* insufficient data from user */ + ch = riocm_get_channel(ch_id); if (!ch) { riocm_error("%s(%d) ch_%d not found", current->comm, From 02fb36505c61c35d5fd879f5a66a97935dbcb2ee Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 9 Jun 2025 12:24:42 +1200 Subject: [PATCH 229/497] MAINTAINERS: add Barry as a THP reviewer I have been actively contributing to mTHP and reviewing related patches for an extended period, and I would like to continue supporting patch reviews. Link: https://lkml.kernel.org/r/20250609002442.1856-1-21cnbao@gmail.com Signed-off-by: Barry Song Acked-by: Zi Yan Acked-by: Baolin Wang Acked-by: Dev Jain Acked-by: Lorenzo Stoakes Reviewed-by: Ryan Roberts Cc: David Hildenbrand Cc: Liam R. Howlett Cc: Nico Pache Signed-off-by: Andrew Morton --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a92290fffa16..8315c84df075 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15919,6 +15919,7 @@ R: Liam R. Howlett R: Nico Pache R: Ryan Roberts R: Dev Jain +R: Barry Song L: linux-mm@kvack.org S: Maintained W: http://www.linux-mm.org From 66ac1a4d366d3faa95fcf6082b555f8d77f1e8db Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 8 Jun 2025 22:12:35 +0800 Subject: [PATCH 230/497] init: fix build warnings about export.h After commit a934a57a42f64a4 ("scripts/misc-check: check missing #include when W=1") and 7d95680d64ac8e836c ("scripts/misc-check: check unnecessary #include when W=1"), we get some build warnings with W=1: init/main.c: warning: EXPORT_SYMBOL() is used, but #include is missing init/initramfs.c: warning: EXPORT_SYMBOL() is used, but #include is missing So fix these build warnings for the init code. Link: https://lkml.kernel.org/r/20250608141235.155206-1-chenhuacai@loongson.cn Fixes: a934a57a42f6 ("scripts/misc-check: check missing #include when W=1") Signed-off-by: Huacai Chen Reviewed-by: Masahiro Yamada Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Signed-off-by: Andrew Morton --- init/initramfs.c | 1 + init/main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/init/initramfs.c b/init/initramfs.c index 72bad44a1d41..097673b97784 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include diff --git a/init/main.c b/init/main.c index ed576c7f475d..225a58279acd 100644 --- a/init/main.c +++ b/init/main.c @@ -13,6 +13,7 @@ #define DEBUG /* Enable initcall_debug */ #include +#include #include #include #include From 614b1c3cbfb0ecbafd40284d2f8e67c865818714 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 12 Jun 2025 09:27:22 +0200 Subject: [PATCH 231/497] i2c: use inclusive callbacks in struct i2c_algorithm Convert the I2C subsystem to drop using the 'master_'-prefixed callbacks in favor of the simplified ones. Fix alignment of '=' while here. Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-bit.c | 4 ++-- drivers/i2c/algos/i2c-algo-pca.c | 4 ++-- drivers/i2c/algos/i2c-algo-pcf.c | 4 ++-- drivers/i2c/busses/i2c-amd-mp2-plat.c | 2 +- drivers/i2c/busses/i2c-aspeed.c | 8 ++++---- drivers/i2c/busses/i2c-at91-master.c | 4 ++-- drivers/i2c/busses/i2c-axxia.c | 2 +- drivers/i2c/busses/i2c-bcm-iproc.c | 2 +- drivers/i2c/busses/i2c-cadence.c | 10 +++++----- drivers/i2c/busses/i2c-cgbc.c | 4 ++-- drivers/i2c/busses/i2c-eg20t.c | 2 +- drivers/i2c/busses/i2c-emev2.c | 6 +++--- drivers/i2c/busses/i2c-exynos5.c | 6 +++--- drivers/i2c/busses/i2c-gxp.c | 6 +++--- drivers/i2c/busses/i2c-img-scb.c | 2 +- drivers/i2c/busses/i2c-imx-lpi2c.c | 8 ++++---- drivers/i2c/busses/i2c-imx.c | 8 ++++---- drivers/i2c/busses/i2c-keba.c | 2 +- drivers/i2c/busses/i2c-mchp-pci1xxxx.c | 2 +- drivers/i2c/busses/i2c-meson.c | 4 ++-- drivers/i2c/busses/i2c-microchip-corei2c.c | 2 +- drivers/i2c/busses/i2c-mt65xx.c | 2 +- drivers/i2c/busses/i2c-mxs.c | 2 +- drivers/i2c/busses/i2c-nomadik.c | 4 ++-- drivers/i2c/busses/i2c-npcm7xx.c | 6 +++--- drivers/i2c/busses/i2c-omap.c | 6 +++--- drivers/i2c/busses/i2c-pnx.c | 2 +- drivers/i2c/busses/i2c-pxa.c | 16 ++++++++-------- drivers/i2c/busses/i2c-qcom-cci.c | 4 ++-- drivers/i2c/busses/i2c-qcom-geni.c | 4 ++-- drivers/i2c/busses/i2c-qup.c | 8 ++++---- drivers/i2c/busses/i2c-rcar.c | 10 +++++----- drivers/i2c/busses/i2c-s3c2410.c | 6 +++--- drivers/i2c/busses/i2c-sh7760.c | 4 ++-- drivers/i2c/busses/i2c-sh_mobile.c | 4 ++-- drivers/i2c/busses/i2c-stm32f7.c | 4 ++-- drivers/i2c/busses/i2c-synquacer.c | 4 ++-- drivers/i2c/busses/i2c-tegra.c | 6 +++--- drivers/i2c/busses/i2c-xiic.c | 4 ++-- drivers/i2c/busses/i2c-xlp9xx.c | 2 +- drivers/i2c/i2c-atr.c | 2 +- drivers/i2c/i2c-mux.c | 6 +++--- drivers/i2c/muxes/i2c-demux-pinctrl.c | 4 ++-- 43 files changed, 101 insertions(+), 101 deletions(-) diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index eddf25b90ca8..6544d27e4419 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -619,8 +619,8 @@ static u32 bit_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ const struct i2c_algorithm i2c_bit_algo = { - .master_xfer = bit_xfer, - .master_xfer_atomic = bit_xfer_atomic, + .xfer = bit_xfer, + .xfer_atomic = bit_xfer_atomic, .functionality = bit_func, }; EXPORT_SYMBOL(i2c_bit_algo); diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 384af88e58ad..74b66aec33d4 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -361,8 +361,8 @@ static u32 pca_func(struct i2c_adapter *adap) } static const struct i2c_algorithm pca_algo = { - .master_xfer = pca_xfer, - .functionality = pca_func, + .xfer = pca_xfer, + .functionality = pca_func, }; static unsigned int pca_probe_chip(struct i2c_adapter *adap) diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c index 740066ceaea3..fd563e845d4b 100644 --- a/drivers/i2c/algos/i2c-algo-pcf.c +++ b/drivers/i2c/algos/i2c-algo-pcf.c @@ -389,8 +389,8 @@ static u32 pcf_func(struct i2c_adapter *adap) /* exported algorithm data: */ static const struct i2c_algorithm pcf_algo = { - .master_xfer = pcf_xfer, - .functionality = pcf_func, + .xfer = pcf_xfer, + .functionality = pcf_func, }; /* diff --git a/drivers/i2c/busses/i2c-amd-mp2-plat.c b/drivers/i2c/busses/i2c-amd-mp2-plat.c index d9dd0e475d1a..188e24cc4d35 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-plat.c +++ b/drivers/i2c/busses/i2c-amd-mp2-plat.c @@ -179,7 +179,7 @@ static u32 i2c_amd_func(struct i2c_adapter *a) } static const struct i2c_algorithm i2c_amd_algorithm = { - .master_xfer = i2c_amd_xfer, + .xfer = i2c_amd_xfer, .functionality = i2c_amd_func, }; diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index 1550d3d552ae..a26b74c71206 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -814,11 +814,11 @@ static int aspeed_i2c_unreg_slave(struct i2c_client *client) #endif /* CONFIG_I2C_SLAVE */ static const struct i2c_algorithm aspeed_i2c_algo = { - .master_xfer = aspeed_i2c_master_xfer, - .functionality = aspeed_i2c_functionality, + .xfer = aspeed_i2c_master_xfer, + .functionality = aspeed_i2c_functionality, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = aspeed_i2c_reg_slave, - .unreg_slave = aspeed_i2c_unreg_slave, + .reg_slave = aspeed_i2c_reg_slave, + .unreg_slave = aspeed_i2c_unreg_slave, #endif /* CONFIG_I2C_SLAVE */ }; diff --git a/drivers/i2c/busses/i2c-at91-master.c b/drivers/i2c/busses/i2c-at91-master.c index 374fc50bb205..59795c1c24ff 100644 --- a/drivers/i2c/busses/i2c-at91-master.c +++ b/drivers/i2c/busses/i2c-at91-master.c @@ -739,8 +739,8 @@ static u32 at91_twi_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm at91_twi_algorithm = { - .master_xfer = at91_twi_xfer, - .functionality = at91_twi_func, + .xfer = at91_twi_xfer, + .functionality = at91_twi_func, }; static int at91_twi_configure_dma(struct at91_twi_dev *dev, u32 phy_addr) diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c index 50030256cd85..0555eeb6903a 100644 --- a/drivers/i2c/busses/i2c-axxia.c +++ b/drivers/i2c/busses/i2c-axxia.c @@ -706,7 +706,7 @@ static int axxia_i2c_unreg_slave(struct i2c_client *slave) } static const struct i2c_algorithm axxia_i2c_algo = { - .master_xfer = axxia_i2c_xfer, + .xfer = axxia_i2c_xfer, .functionality = axxia_i2c_func, .reg_slave = axxia_i2c_reg_slave, .unreg_slave = axxia_i2c_unreg_slave, diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 63bc3c8f49d3..e418a4f23f15 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -1041,7 +1041,7 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) } static struct i2c_algorithm bcm_iproc_algo = { - .master_xfer = bcm_iproc_i2c_xfer, + .xfer = bcm_iproc_i2c_xfer, .functionality = bcm_iproc_i2c_functionality, .reg_slave = bcm_iproc_i2c_reg_slave, .unreg_slave = bcm_iproc_i2c_unreg_slave, diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 8df63aaf2a80..697d095afbe4 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -1231,12 +1231,12 @@ static int cdns_unreg_slave(struct i2c_client *slave) #endif static const struct i2c_algorithm cdns_i2c_algo = { - .master_xfer = cdns_i2c_master_xfer, - .master_xfer_atomic = cdns_i2c_master_xfer_atomic, - .functionality = cdns_i2c_func, + .xfer = cdns_i2c_master_xfer, + .xfer_atomic = cdns_i2c_master_xfer_atomic, + .functionality = cdns_i2c_func, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = cdns_reg_slave, - .unreg_slave = cdns_unreg_slave, + .reg_slave = cdns_reg_slave, + .unreg_slave = cdns_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-cgbc.c b/drivers/i2c/busses/i2c-cgbc.c index f054d167ac47..25a74fa51aa0 100644 --- a/drivers/i2c/busses/i2c-cgbc.c +++ b/drivers/i2c/busses/i2c-cgbc.c @@ -331,8 +331,8 @@ static u32 cgbc_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm cgbc_i2c_algorithm = { - .master_xfer = cgbc_i2c_xfer, - .functionality = cgbc_i2c_func, + .xfer = cgbc_i2c_xfer, + .functionality = cgbc_i2c_func, }; static struct i2c_algo_cgbc_data cgbc_i2c_algo_data[] = { diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c index efdaddf99f9e..27ea3c130a16 100644 --- a/drivers/i2c/busses/i2c-eg20t.c +++ b/drivers/i2c/busses/i2c-eg20t.c @@ -690,7 +690,7 @@ static u32 pch_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm pch_algorithm = { - .master_xfer = pch_i2c_xfer, + .xfer = pch_i2c_xfer, .functionality = pch_i2c_func }; diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c index 2512cef8e2a2..ece019b3d066 100644 --- a/drivers/i2c/busses/i2c-emev2.c +++ b/drivers/i2c/busses/i2c-emev2.c @@ -351,10 +351,10 @@ static int em_i2c_unreg_slave(struct i2c_client *slave) } static const struct i2c_algorithm em_i2c_algo = { - .master_xfer = em_i2c_xfer, + .xfer = em_i2c_xfer, .functionality = em_i2c_func, - .reg_slave = em_i2c_reg_slave, - .unreg_slave = em_i2c_unreg_slave, + .reg_slave = em_i2c_reg_slave, + .unreg_slave = em_i2c_unreg_slave, }; static int em_i2c_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index 02f24479aa07..9c1c5f3c09f6 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -879,9 +879,9 @@ static u32 exynos5_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm exynos5_i2c_algorithm = { - .master_xfer = exynos5_i2c_xfer, - .master_xfer_atomic = exynos5_i2c_xfer_atomic, - .functionality = exynos5_i2c_func, + .xfer = exynos5_i2c_xfer, + .xfer_atomic = exynos5_i2c_xfer_atomic, + .functionality = exynos5_i2c_func, }; static int exynos5_i2c_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-gxp.c b/drivers/i2c/busses/i2c-gxp.c index 0fc39caa6c87..2d117e7e3cb6 100644 --- a/drivers/i2c/busses/i2c-gxp.c +++ b/drivers/i2c/busses/i2c-gxp.c @@ -184,11 +184,11 @@ static int gxp_i2c_unreg_slave(struct i2c_client *slave) #endif static const struct i2c_algorithm gxp_i2c_algo = { - .master_xfer = gxp_i2c_master_xfer, + .xfer = gxp_i2c_master_xfer, .functionality = gxp_i2c_func, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = gxp_i2c_reg_slave, - .unreg_slave = gxp_i2c_unreg_slave, + .reg_slave = gxp_i2c_reg_slave, + .unreg_slave = gxp_i2c_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-img-scb.c b/drivers/i2c/busses/i2c-img-scb.c index 3278707bb885..a454f9f25146 100644 --- a/drivers/i2c/busses/i2c-img-scb.c +++ b/drivers/i2c/busses/i2c-img-scb.c @@ -1143,7 +1143,7 @@ static u32 img_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm img_i2c_algo = { - .master_xfer = img_i2c_xfer, + .xfer = img_i2c_xfer, .functionality = img_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c index 342d47e67586..064bc83840a6 100644 --- a/drivers/i2c/busses/i2c-imx-lpi2c.c +++ b/drivers/i2c/busses/i2c-imx-lpi2c.c @@ -1268,10 +1268,10 @@ static u32 lpi2c_imx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm lpi2c_imx_algo = { - .master_xfer = lpi2c_imx_xfer, - .functionality = lpi2c_imx_func, - .reg_target = lpi2c_imx_register_target, - .unreg_target = lpi2c_imx_unregister_target, + .xfer = lpi2c_imx_xfer, + .functionality = lpi2c_imx_func, + .reg_target = lpi2c_imx_register_target, + .unreg_target = lpi2c_imx_unregister_target, }; static const struct of_device_id lpi2c_imx_of_match[] = { diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index de01dfecb16e..e5732b0557fb 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1692,11 +1692,11 @@ static u32 i2c_imx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm i2c_imx_algo = { - .master_xfer = i2c_imx_xfer, - .master_xfer_atomic = i2c_imx_xfer_atomic, + .xfer = i2c_imx_xfer, + .xfer_atomic = i2c_imx_xfer_atomic, .functionality = i2c_imx_func, - .reg_slave = i2c_imx_reg_slave, - .unreg_slave = i2c_imx_unreg_slave, + .reg_slave = i2c_imx_reg_slave, + .unreg_slave = i2c_imx_unreg_slave, }; static int i2c_imx_probe(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-keba.c b/drivers/i2c/busses/i2c-keba.c index 7b9ed2592f5b..9420c8b342b5 100644 --- a/drivers/i2c/busses/i2c-keba.c +++ b/drivers/i2c/busses/i2c-keba.c @@ -500,7 +500,7 @@ static u32 ki2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm ki2c_algo = { - .master_xfer = ki2c_xfer, + .xfer = ki2c_xfer, .functionality = ki2c_func, }; diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c index 5ef136c3ecb1..bc0f1a0c8ee1 100644 --- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c +++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c @@ -1048,7 +1048,7 @@ static u32 pci1xxxx_i2c_get_funcs(struct i2c_adapter *adap) } static const struct i2c_algorithm pci1xxxx_i2c_algo = { - .master_xfer = pci1xxxx_i2c_xfer, + .xfer = pci1xxxx_i2c_xfer, .functionality = pci1xxxx_i2c_get_funcs, }; diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index e1d69537353b..0d9032953e48 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -448,8 +448,8 @@ static u32 meson_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm meson_i2c_algorithm = { - .master_xfer = meson_i2c_xfer, - .master_xfer_atomic = meson_i2c_xfer_atomic, + .xfer = meson_i2c_xfer, + .xfer_atomic = meson_i2c_xfer_atomic, .functionality = meson_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index 492bf4c34722..f173bda1c98c 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -526,7 +526,7 @@ static int mchp_corei2c_smbus_xfer(struct i2c_adapter *adap, u16 addr, unsigned } static const struct i2c_algorithm mchp_corei2c_algo = { - .master_xfer = mchp_corei2c_xfer, + .xfer = mchp_corei2c_xfer, .functionality = mchp_corei2c_func, .smbus_xfer = mchp_corei2c_smbus_xfer, }; diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 5bd342047d59..ab456c3717db 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -1342,7 +1342,7 @@ static u32 mtk_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm mtk_i2c_algorithm = { - .master_xfer = mtk_i2c_transfer, + .xfer = mtk_i2c_transfer, .functionality = mtk_i2c_functionality, }; diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index ad62d56b2186..08c9091a1e35 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -687,7 +687,7 @@ static irqreturn_t mxs_i2c_isr(int this_irq, void *dev_id) } static const struct i2c_algorithm mxs_i2c_algo = { - .master_xfer = mxs_i2c_xfer, + .xfer = mxs_i2c_xfer, .functionality = mxs_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index d2877e4cc28d..19b648fc094d 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -996,8 +996,8 @@ static unsigned int nmk_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm nmk_i2c_algo = { - .master_xfer = nmk_i2c_xfer, - .functionality = nmk_i2c_functionality + .xfer = nmk_i2c_xfer, + .functionality = nmk_i2c_functionality }; static void nmk_i2c_of_probe(struct device_node *np, diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 892e2d2988a7..8b7e15240fb0 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2470,11 +2470,11 @@ static const struct i2c_adapter_quirks npcm_i2c_quirks = { }; static const struct i2c_algorithm npcm_i2c_algo = { - .master_xfer = npcm_i2c_master_xfer, + .xfer = npcm_i2c_master_xfer, .functionality = npcm_i2c_functionality, #if IS_ENABLED(CONFIG_I2C_SLAVE) - .reg_slave = npcm_i2c_reg_slave, - .unreg_slave = npcm_i2c_unreg_slave, + .reg_slave = npcm_i2c_reg_slave, + .unreg_slave = npcm_i2c_unreg_slave, #endif }; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 876791d20ed5..f1cc26ac5b80 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1201,9 +1201,9 @@ omap_i2c_isr_thread(int this_irq, void *dev_id) } static const struct i2c_algorithm omap_i2c_algo = { - .master_xfer = omap_i2c_xfer_irq, - .master_xfer_atomic = omap_i2c_xfer_polling, - .functionality = omap_i2c_func, + .xfer = omap_i2c_xfer_irq, + .xfer_atomic = omap_i2c_xfer_polling, + .functionality = omap_i2c_func, }; static const struct i2c_adapter_quirks omap_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c index 9a1af5bbd604..8daa0008bd05 100644 --- a/drivers/i2c/busses/i2c-pnx.c +++ b/drivers/i2c/busses/i2c-pnx.c @@ -580,7 +580,7 @@ static u32 i2c_pnx_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm pnx_algorithm = { - .master_xfer = i2c_pnx_xfer, + .xfer = i2c_pnx_xfer, .functionality = i2c_pnx_func, }; diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 4415a29f749b..968a8b8794da 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -1154,11 +1154,11 @@ static u32 i2c_pxa_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm i2c_pxa_algorithm = { - .master_xfer = i2c_pxa_xfer, - .functionality = i2c_pxa_functionality, + .xfer = i2c_pxa_xfer, + .functionality = i2c_pxa_functionality, #ifdef CONFIG_I2C_PXA_SLAVE - .reg_slave = i2c_pxa_slave_reg, - .unreg_slave = i2c_pxa_slave_unreg, + .reg_slave = i2c_pxa_slave_reg, + .unreg_slave = i2c_pxa_slave_unreg, #endif }; @@ -1244,11 +1244,11 @@ static int i2c_pxa_pio_xfer(struct i2c_adapter *adap, } static const struct i2c_algorithm i2c_pxa_pio_algorithm = { - .master_xfer = i2c_pxa_pio_xfer, - .functionality = i2c_pxa_functionality, + .xfer = i2c_pxa_pio_xfer, + .functionality = i2c_pxa_functionality, #ifdef CONFIG_I2C_PXA_SLAVE - .reg_slave = i2c_pxa_slave_reg, - .unreg_slave = i2c_pxa_slave_unreg, + .reg_slave = i2c_pxa_slave_reg, + .unreg_slave = i2c_pxa_slave_unreg, #endif }; diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index 05b73326afd4..a3afa11a71a1 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -462,8 +462,8 @@ static u32 cci_func(struct i2c_adapter *adap) } static const struct i2c_algorithm cci_algo = { - .master_xfer = cci_xfer, - .functionality = cci_func, + .xfer = cci_xfer, + .functionality = cci_func, }; static int cci_enable_clocks(struct cci *cci) diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index ccea575fb783..13889f52b6f7 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -727,8 +727,8 @@ static u32 geni_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm geni_i2c_algo = { - .master_xfer = geni_i2c_xfer, - .functionality = geni_i2c_func, + .xfer = geni_i2c_xfer, + .functionality = geni_i2c_func, }; #ifdef CONFIG_ACPI diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c index 3a36d682ed57..6059f585843e 100644 --- a/drivers/i2c/busses/i2c-qup.c +++ b/drivers/i2c/busses/i2c-qup.c @@ -1634,13 +1634,13 @@ static u32 qup_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm qup_i2c_algo = { - .master_xfer = qup_i2c_xfer, - .functionality = qup_i2c_func, + .xfer = qup_i2c_xfer, + .functionality = qup_i2c_func, }; static const struct i2c_algorithm qup_i2c_algo_v2 = { - .master_xfer = qup_i2c_xfer_v2, - .functionality = qup_i2c_func, + .xfer = qup_i2c_xfer_v2, + .functionality = qup_i2c_func, }; /* diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 5693a38da7b5..d51884ab99f4 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -1084,11 +1084,11 @@ static u32 rcar_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm rcar_i2c_algo = { - .master_xfer = rcar_i2c_master_xfer, - .master_xfer_atomic = rcar_i2c_master_xfer_atomic, - .functionality = rcar_i2c_func, - .reg_slave = rcar_reg_slave, - .unreg_slave = rcar_unreg_slave, + .xfer = rcar_i2c_master_xfer, + .xfer_atomic = rcar_i2c_master_xfer_atomic, + .functionality = rcar_i2c_func, + .reg_slave = rcar_reg_slave, + .unreg_slave = rcar_unreg_slave, }; static const struct i2c_adapter_quirks rcar_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 0f3cf500df68..f4fa4703acbd 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -800,9 +800,9 @@ static u32 s3c24xx_i2c_func(struct i2c_adapter *adap) /* i2c bus registration info */ static const struct i2c_algorithm s3c24xx_i2c_algorithm = { - .master_xfer = s3c24xx_i2c_xfer, - .master_xfer_atomic = s3c24xx_i2c_xfer_atomic, - .functionality = s3c24xx_i2c_func, + .xfer = s3c24xx_i2c_xfer, + .xfer_atomic = s3c24xx_i2c_xfer_atomic, + .functionality = s3c24xx_i2c_func, }; /* diff --git a/drivers/i2c/busses/i2c-sh7760.c b/drivers/i2c/busses/i2c-sh7760.c index 620f12596763..43f33988b98f 100644 --- a/drivers/i2c/busses/i2c-sh7760.c +++ b/drivers/i2c/busses/i2c-sh7760.c @@ -379,8 +379,8 @@ static u32 sh7760_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm sh7760_i2c_algo = { - .master_xfer = sh7760_i2c_master_xfer, - .functionality = sh7760_i2c_func, + .xfer = sh7760_i2c_master_xfer, + .functionality = sh7760_i2c_func, }; /* calculate CCR register setting for a desired scl clock. SCL clock is diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c index adfcee6c9fdc..dae8967f8749 100644 --- a/drivers/i2c/busses/i2c-sh_mobile.c +++ b/drivers/i2c/busses/i2c-sh_mobile.c @@ -740,8 +740,8 @@ static u32 sh_mobile_i2c_func(struct i2c_adapter *adapter) static const struct i2c_algorithm sh_mobile_i2c_algorithm = { .functionality = sh_mobile_i2c_func, - .master_xfer = sh_mobile_i2c_xfer, - .master_xfer_atomic = sh_mobile_i2c_xfer_atomic, + .xfer = sh_mobile_i2c_xfer, + .xfer_atomic = sh_mobile_i2c_xfer_atomic, }; static const struct i2c_adapter_quirks sh_mobile_i2c_quirks = { diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 973a3a8c6d4a..e4aaeb2262d0 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -2151,8 +2151,8 @@ static u32 stm32f7_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm stm32f7_i2c_algo = { - .master_xfer = stm32f7_i2c_xfer, - .master_xfer_atomic = stm32f7_i2c_xfer_atomic, + .xfer = stm32f7_i2c_xfer, + .xfer_atomic = stm32f7_i2c_xfer_atomic, .smbus_xfer = stm32f7_i2c_smbus_xfer, .functionality = stm32f7_i2c_func, .reg_slave = stm32f7_i2c_reg_slave, diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c index 31f8d08e32a4..1230f51e1624 100644 --- a/drivers/i2c/busses/i2c-synquacer.c +++ b/drivers/i2c/busses/i2c-synquacer.c @@ -520,8 +520,8 @@ static u32 synquacer_i2c_functionality(struct i2c_adapter *adap) } static const struct i2c_algorithm synquacer_i2c_algo = { - .master_xfer = synquacer_i2c_xfer, - .functionality = synquacer_i2c_functionality, + .xfer = synquacer_i2c_xfer, + .functionality = synquacer_i2c_functionality, }; static const struct i2c_adapter synquacer_i2c_ops = { diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 049b4d154c23..0862b98007f5 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1440,9 +1440,9 @@ static u32 tegra_i2c_func(struct i2c_adapter *adap) } static const struct i2c_algorithm tegra_i2c_algo = { - .master_xfer = tegra_i2c_xfer, - .master_xfer_atomic = tegra_i2c_xfer_atomic, - .functionality = tegra_i2c_func, + .xfer = tegra_i2c_xfer, + .xfer_atomic = tegra_i2c_xfer_atomic, + .functionality = tegra_i2c_func, }; /* payload size is only 12 bit */ diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 6bc1575cea6c..607026c921d6 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -1398,8 +1398,8 @@ static u32 xiic_func(struct i2c_adapter *adap) } static const struct i2c_algorithm xiic_algorithm = { - .master_xfer = xiic_xfer, - .master_xfer_atomic = xiic_xfer_atomic, + .xfer = xiic_xfer, + .xfer_atomic = xiic_xfer_atomic, .functionality = xiic_func, }; diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c index 4d5e49b6321b..ddb1c3e8bc9d 100644 --- a/drivers/i2c/busses/i2c-xlp9xx.c +++ b/drivers/i2c/busses/i2c-xlp9xx.c @@ -452,7 +452,7 @@ static u32 xlp9xx_i2c_functionality(struct i2c_adapter *adapter) } static const struct i2c_algorithm xlp9xx_i2c_algo = { - .master_xfer = xlp9xx_i2c_xfer, + .xfer = xlp9xx_i2c_xfer, .functionality = xlp9xx_i2c_functionality, }; diff --git a/drivers/i2c/i2c-atr.c b/drivers/i2c/i2c-atr.c index be7d6d41e0b2..dd194476b118 100644 --- a/drivers/i2c/i2c-atr.c +++ b/drivers/i2c/i2c-atr.c @@ -738,7 +738,7 @@ struct i2c_atr *i2c_atr_new(struct i2c_adapter *parent, struct device *dev, atr->flags = flags; if (parent->algo->master_xfer) - atr->algo.master_xfer = i2c_atr_master_xfer; + atr->algo.xfer = i2c_atr_master_xfer; if (parent->algo->smbus_xfer) atr->algo.smbus_xfer = i2c_atr_smbus_xfer; atr->algo.functionality = i2c_atr_functionality; diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index fda72e8be885..4d8690981a55 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -293,12 +293,12 @@ int i2c_mux_add_adapter(struct i2c_mux_core *muxc, */ if (parent->algo->master_xfer) { if (muxc->mux_locked) - priv->algo.master_xfer = i2c_mux_master_xfer; + priv->algo.xfer = i2c_mux_master_xfer; else - priv->algo.master_xfer = __i2c_mux_master_xfer; + priv->algo.xfer = __i2c_mux_master_xfer; } if (parent->algo->master_xfer_atomic) - priv->algo.master_xfer_atomic = priv->algo.master_xfer; + priv->algo.xfer_atomic = priv->algo.master_xfer; if (parent->algo->smbus_xfer) { if (muxc->mux_locked) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 77a740561fd7..f2a1f4744978 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -95,9 +95,9 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne priv->cur_chan = new_chan; /* Now fill out current adapter structure. cur_chan must be up to date */ - priv->algo.master_xfer = i2c_demux_master_xfer; + priv->algo.xfer = i2c_demux_master_xfer; if (adap->algo->master_xfer_atomic) - priv->algo.master_xfer_atomic = i2c_demux_master_xfer; + priv->algo.xfer_atomic = i2c_demux_master_xfer; priv->algo.functionality = i2c_demux_functionality; snprintf(priv->cur_adap.name, sizeof(priv->cur_adap.name), From 2d5a84c3ecc075d87bcb16c1cc80277b5837c153 Mon Sep 17 00:00:00 2001 From: Xi Pardee Date: Tue, 10 Jun 2025 16:04:06 -0700 Subject: [PATCH 232/497] platform/x86/intel/pmc: Add Lunar Lake support to Intel PMC SSRAM Telemetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Lunar Lake support to Intel PMC SSRAM Telemetry driver. Signed-off-by: Xi Pardee Link: https://lore.kernel.org/r/20250610230416.622970-1-xi.pardee@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/pmc/core.h | 3 +++ drivers/platform/x86/intel/pmc/ssram_telemetry.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index e136d18b1d38..c1db41cb8334 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -299,6 +299,9 @@ enum ppfear_regs { #define PTL_PCD_PMC_MMIO_REG_LEN 0x31A8 /* SSRAM PMC Device ID */ +/* LNL */ +#define PMC_DEVID_LNL_SOCM 0xa87f + /* ARL */ #define PMC_DEVID_ARL_SOCM 0x777f #define PMC_DEVID_ARL_SOCS 0xae7f diff --git a/drivers/platform/x86/intel/pmc/ssram_telemetry.c b/drivers/platform/x86/intel/pmc/ssram_telemetry.c index b207247eb5dd..24d5d01805c8 100644 --- a/drivers/platform/x86/intel/pmc/ssram_telemetry.c +++ b/drivers/platform/x86/intel/pmc/ssram_telemetry.c @@ -187,6 +187,7 @@ static const struct pci_device_id intel_pmc_ssram_telemetry_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_MTL_SOCM) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCM) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_LNL_SOCM) }, { } }; MODULE_DEVICE_TABLE(pci, intel_pmc_ssram_telemetry_pci_ids); From be574d5eee9808a422cff0293da9b08c0e434ed0 Mon Sep 17 00:00:00 2001 From: Xi Pardee Date: Tue, 10 Jun 2025 16:04:07 -0700 Subject: [PATCH 233/497] platform/x86/intel/pmc: Add Panther Lake support to Intel PMC SSRAM Telemetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Panther Lake support to Intel PMC SSRAM Telemetry driver. Signed-off-by: Xi Pardee Link: https://lore.kernel.org/r/20250610230416.622970-2-xi.pardee@linux.intel.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/pmc/core.h | 4 ++++ drivers/platform/x86/intel/pmc/ssram_telemetry.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h index c1db41cb8334..4a94a4ee031e 100644 --- a/drivers/platform/x86/intel/pmc/core.h +++ b/drivers/platform/x86/intel/pmc/core.h @@ -302,6 +302,10 @@ enum ppfear_regs { /* LNL */ #define PMC_DEVID_LNL_SOCM 0xa87f +/* PTL */ +#define PMC_DEVID_PTL_PCDH 0xe37f +#define PMC_DEVID_PTL_PCDP 0xe47f + /* ARL */ #define PMC_DEVID_ARL_SOCM 0x777f #define PMC_DEVID_ARL_SOCS 0xae7f diff --git a/drivers/platform/x86/intel/pmc/ssram_telemetry.c b/drivers/platform/x86/intel/pmc/ssram_telemetry.c index 24d5d01805c8..93579152188e 100644 --- a/drivers/platform/x86/intel/pmc/ssram_telemetry.c +++ b/drivers/platform/x86/intel/pmc/ssram_telemetry.c @@ -188,6 +188,8 @@ static const struct pci_device_id intel_pmc_ssram_telemetry_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_ARL_SOCM) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_LNL_SOCM) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_PTL_PCDH) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PMC_DEVID_PTL_PCDP) }, { } }; MODULE_DEVICE_TABLE(pci, intel_pmc_ssram_telemetry_pci_ids); From 0c44b46f51a17baa7ab67de1464427116e9c4eaa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jun 2025 11:34:55 +0200 Subject: [PATCH 234/497] platform/x86/intel-uncore-freq: avoid non-literal format string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using a string variable in place of a format string causes a W=1 build warning: drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c:61:40: error: format string is not a string literal (potentially insecure) [-Werror,-Wformat-security] 61 | length += sysfs_emit_at(buf, length, agent_name[agent]); | ^~~~~~~~~~~~~~~~~ Use the safer "%s" format string to print it instead. Fixes: b98fa870fce2 ("platform/x86/intel-uncore-freq: Add attributes to show agent types") Signed-off-by: Arnd Bergmann Tested-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20250610093459.2646337-1-arnd@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- .../x86/intel/uncore-frequency/uncore-frequency-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c index 0f8aea18275b..65897fae17df 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-common.c @@ -58,7 +58,7 @@ static ssize_t show_agent_types(struct kobject *kobj, struct kobj_attribute *att if (length) length += sysfs_emit_at(buf, length, " "); - length += sysfs_emit_at(buf, length, agent_name[agent]); + length += sysfs_emit_at(buf, length, "%s", agent_name[agent]); } length += sysfs_emit_at(buf, length, "\n"); From e479da4054875c4cc53a7fb956ebff03d2dac939 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 11 Jun 2025 12:13:06 +0100 Subject: [PATCH 235/497] drm/ssd130x: fix ssd132x_clear_screen() columns The number of columns relates to the width, not the height. Use the correct variable. Signed-off-by: John Keeping Reviewed-by: Javier Martinez Canillas Fixes: fdd591e00a9c ("drm/ssd130x: Add support for the SSD132x OLED controller family") Link: https://lore.kernel.org/r/20250611111307.1814876-1-jkeeping@inmusicbrands.com Signed-off-by: Javier Martinez Canillas --- drivers/gpu/drm/solomon/ssd130x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index dd2006d51c7a..eec43d1a5595 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -974,7 +974,7 @@ static void ssd130x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) static void ssd132x_clear_screen(struct ssd130x_device *ssd130x, u8 *data_array) { - unsigned int columns = DIV_ROUND_UP(ssd130x->height, SSD132X_SEGMENT_WIDTH); + unsigned int columns = DIV_ROUND_UP(ssd130x->width, SSD132X_SEGMENT_WIDTH); unsigned int height = ssd130x->height; memset(data_array, 0, columns * height); From 8ecbad4853f83d9853d7bb0c2d8373afab8851d3 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 23 May 2025 08:40:41 +0200 Subject: [PATCH 236/497] drm/arm/malidp: Silence informational message When checking for unsupported expect an error is printed every time. This spams the log for platforms where this is expected, e.g. ls1028a having a Vivante (etnaviv) GPU and Mali display processor. Signed-off-by: Alexander Stein Signed-off-by: Liviu Dudau Link: https://lore.kernel.org/r/20250523064042.3275926-1-alexander.stein@ew.tq-group.com --- drivers/gpu/drm/arm/malidp_planes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 34547edf1ee3..87f2e5ee8790 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -159,7 +159,7 @@ bool malidp_format_mod_supported(struct drm_device *drm, } if (!fourcc_mod_is_vendor(modifier, ARM)) { - DRM_ERROR("Unknown modifier (not Arm)\n"); + DRM_DEBUG_KMS("Unknown modifier (not Arm)\n"); return false; } From 527c88d8390d6c0358dea4d71696795c05328925 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Jun 2025 09:22:45 +0200 Subject: [PATCH 237/497] ovl: fix debug print in case of mkdir error We want to print the name in case of mkdir failure and now we will get a cryptic (efault) as name. Fixes: c54b386969a5 ("VFS: Change vfs_mkdir() to return the dentry.") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250612072245.2825938-1-amir73il@gmail.com Signed-off-by: Christian Brauner --- fs/overlayfs/overlayfs.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 8baaba0a3fe5..497323128e5f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -246,9 +246,11 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { - dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry)); - return dentry; + struct dentry *ret; + + ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); + return ret; } static inline int ovl_do_mknod(struct ovl_fs *ofs, From 0b9d62a47149083d581d8b2abb04124b6175cb29 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 11 Jun 2025 09:40:44 -0700 Subject: [PATCH 238/497] fs: unlock the superblock during iterate_supers_type This function takes super_lock in shared mode, so it should release the same lock. Cc: stable@vger.kernel.org # v6.16-rc1 Fixes: af7551cf13cf7f ("super: remove pointless s_root checks") Signed-off-by: "Darrick J. Wong" Link: https://lore.kernel.org/20250611164044.GF6138@frogsfrogsfrogs Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/super.c b/fs/super.c index 21799e213fd7..80418ca8e215 100644 --- a/fs/super.c +++ b/fs/super.c @@ -964,8 +964,10 @@ void iterate_supers_type(struct file_system_type *type, spin_unlock(&sb_lock); locked = super_lock_shared(sb); - if (locked) + if (locked) { f(sb, arg); + super_unlock_shared(sb); + } spin_lock(&sb_lock); if (p) From c538f400fae22725580842deb2bef546701b64bd Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 11 Jun 2025 13:53:43 -0700 Subject: [PATCH 239/497] io_uring: consistently use rcu semantics with sqpoll thread The sqpoll thread is dereferenced with rcu read protection in one place, so it needs to be annotated as an __rcu type, and should consistently use rcu helpers for access and assignment to make sparse happy. Since most of the accesses occur under the sqd->lock, we can use rcu_dereference_protected() without declaring an rcu read section. Provide a simple helper to get the thread from a locked context. Fixes: ac0b8b327a5677d ("io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo()") Signed-off-by: Keith Busch Link: https://lore.kernel.org/r/20250611205343.1821117-1-kbusch@meta.com [axboe: fold in fix for register.c] Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 4 ++-- io_uring/register.c | 7 +++++-- io_uring/sqpoll.c | 34 ++++++++++++++++++++++++---------- io_uring/sqpoll.h | 8 +++++++- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cf759c172083..4e32f808d07d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2906,7 +2906,7 @@ static __cold void io_ring_exit_work(struct work_struct *work) struct task_struct *tsk; io_sq_thread_park(sqd); - tsk = sqd->thread; + tsk = sqpoll_task_locked(sqd); if (tsk && tsk->io_uring && tsk->io_uring->io_wq) io_wq_cancel_cb(tsk->io_uring->io_wq, io_cancel_ctx_cb, ctx, true); @@ -3142,7 +3142,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) s64 inflight; DEFINE_WAIT(wait); - WARN_ON_ONCE(sqd && sqd->thread != current); + WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current); if (!current->io_uring) return; diff --git a/io_uring/register.c b/io_uring/register.c index cc23a4c205cd..a59589249fce 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (ctx->flags & IORING_SETUP_SQPOLL) { sqd = ctx->sq_data; if (sqd) { + struct task_struct *tsk; + /* * Observe the correct sqd->lock -> ctx->uring_lock * ordering. Fine to drop uring_lock here, we hold @@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, mutex_unlock(&ctx->uring_lock); mutex_lock(&sqd->lock); mutex_lock(&ctx->uring_lock); - if (sqd->thread) - tctx = sqd->thread->io_uring; + tsk = sqpoll_task_locked(sqd); + if (tsk) + tctx = tsk->io_uring; } } else { tctx = current->io_uring; diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 0625a421626f..268d2fbe6160 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -30,7 +30,7 @@ enum { void io_sq_thread_unpark(struct io_sq_data *sqd) __releases(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(sqpoll_task_locked(sqd) == current); /* * Do the dance but not conditional clear_bit() because it'd race with @@ -46,24 +46,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(data_race(sqd->thread) == current); + struct task_struct *tsk; atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } } void io_sq_thread_stop(struct io_sq_data *sqd) { - WARN_ON_ONCE(sqd->thread == current); + struct task_struct *tsk; + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); + tsk = sqpoll_task_locked(sqd); + if (tsk) { + WARN_ON_ONCE(tsk == current); + wake_up_process(tsk); + } mutex_unlock(&sqd->lock); wait_for_completion(&sqd->exited); } @@ -486,7 +494,10 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, goto err_sqpoll; } - sqd->thread = tsk; + mutex_lock(&sqd->lock); + rcu_assign_pointer(sqd->thread, tsk); + mutex_unlock(&sqd->lock); + task_to_put = get_task_struct(tsk); ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); @@ -514,10 +525,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, int ret = -EINVAL; if (sqd) { + struct task_struct *tsk; + io_sq_thread_park(sqd); /* Don't set affinity for a dying thread */ - if (sqd->thread) - ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask); + tsk = sqpoll_task_locked(sqd); + if (tsk) + ret = io_wq_cpu_affinity(tsk->io_uring, mask); io_sq_thread_unpark(sqd); } diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index 4171666b1cf4..b83dcdec9765 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -8,7 +8,7 @@ struct io_sq_data { /* ctx's that are using this sqd */ struct list_head ctx_list; - struct task_struct *thread; + struct task_struct __rcu *thread; struct wait_queue_head wait; unsigned sq_thread_idle; @@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); + +static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) +{ + return rcu_dereference_protected(sqd->thread, + lockdep_is_held(&sqd->lock)); +} From c0c7fa4e7a512006710c8e4d6b6f7b40c9f786cd Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 10 Jun 2025 14:09:35 +0200 Subject: [PATCH 240/497] docs: arm64: Fix ICC_SRE_EL2 register typo in booting.rst Fix trivial ICC_SRE_EL2 register spelling typo in booting.rst. Signed-off-by: Lorenzo Pieralisi Cc: Jonathan Corbet Cc: Will Deacon CC: Catalin Marinas Link: https://lore.kernel.org/r/20250610120935.852034-1-lpieralisi@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/booting.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index dee7b6de864f..ee9b790c0d72 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -234,7 +234,7 @@ Before jumping into the kernel, the following conditions must be met: - If the kernel is entered at EL1: - - ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1 + - ICC_SRE_EL2.Enable (bit 3) must be initialised to 0b1 - ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1. - The DT or ACPI tables must describe a GICv3 interrupt controller. From 650768c512faba8070bf4cfbb28c95eb5cd203f3 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Tue, 27 May 2025 13:56:33 +0530 Subject: [PATCH 241/497] arm64: Restrict pagetable teardown to avoid false warning Commit 9c006972c3fe ("arm64: mmu: drop pXd_present() checks from pXd_free_pYd_table()") removes the pxd_present() checks because the caller checks pxd_present(). But, in case of vmap_try_huge_pud(), the caller only checks pud_present(); pud_free_pmd_page() recurses on each pmd through pmd_free_pte_page(), wherein the pmd may be none. Thus it is possible to hit a warning in the latter, since pmd_none => !pmd_table(). Thus, add a pmd_present() check in pud_free_pmd_page(). This problem was found by code inspection. Fixes: 9c006972c3fe ("arm64: mmu: drop pXd_present() checks from pXd_free_pYd_table()") Cc: stable@vger.kernel.org Reported-by: Ryan Roberts Acked-by: David Hildenbrand Signed-off-by: Dev Jain Reviewed-by: Catalin Marinas Reviewed-by: Anshuman Khandual Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20250527082633.61073-1-dev.jain@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 8fcf59ba39db..00ab1d648db6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1305,7 +1305,8 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) next = addr; end = addr + PUD_SIZE; do { - pmd_free_pte_page(pmdp, next); + if (pmd_present(pmdp_get(pmdp))) + pmd_free_pte_page(pmdp, next); } while (pmdp++, next += PMD_SIZE, next != end); pud_clear(pudp); From d4e6cb324dcc952618fec6b25aa3fc7bfc2750b4 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 6 Jun 2025 11:43:20 +0200 Subject: [PATCH 242/497] net: phy: phy_caps: Don't skip better duplex macth on non-exact match When performing a non-exact phy_caps lookup, we are looking for a supported mode that matches as closely as possible the passed speed/duplex. Blamed patch broke that logic by returning a match too early in case the caller asks for half-duplex, as a full-duplex linkmode may match first, and returned as a non-exact match without even trying to mach on half-duplex modes. Reported-by: Jijie Shao Closes: https://lore.kernel.org/netdev/20250603102500.4ec743cf@fedora/T/#m22ed60ca635c67dc7d9cbb47e8995b2beb5c1576 Tested-by: Jijie Shao Reviewed-by: Larysa Zaremba Fixes: fc81e257d19f ("net: phy: phy_caps: Allow looking-up link caps based on speed and duplex") Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20250606094321.483602-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_caps.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy_caps.c b/drivers/net/phy/phy_caps.c index 703321689726..38417e288611 100644 --- a/drivers/net/phy/phy_caps.c +++ b/drivers/net/phy/phy_caps.c @@ -188,6 +188,9 @@ phy_caps_lookup_by_linkmode_rev(const unsigned long *linkmodes, bool fdx_only) * When @exact is not set, we return either an exact match, or matching capabilities * at lower speed, or the lowest matching speed, or NULL. * + * Non-exact matches will try to return an exact speed and duplex match, but may + * return matching capabilities with same speed but a different duplex. + * * Returns: a matched link_capabilities according to the above process, NULL * otherwise. */ @@ -195,7 +198,7 @@ const struct link_capabilities * phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, bool exact) { - const struct link_capabilities *lcap, *last = NULL; + const struct link_capabilities *lcap, *match = NULL, *last = NULL; for_each_link_caps_desc_speed(lcap) { if (linkmode_intersects(lcap->linkmodes, supported)) { @@ -204,16 +207,19 @@ phy_caps_lookup(int speed, unsigned int duplex, const unsigned long *supported, if (lcap->speed == speed && lcap->duplex == duplex) { return lcap; } else if (!exact) { - if (lcap->speed <= speed) - return lcap; + if (!match && lcap->speed <= speed) + match = lcap; + + if (lcap->speed < speed) + break; } } } - if (!exact) - return last; + if (!match && !exact) + match = last; - return NULL; + return match; } EXPORT_SYMBOL_GPL(phy_caps_lookup); From 7ca52541c05c832d32b112274f81a985101f9ba8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 08:35:01 +0000 Subject: [PATCH 243/497] net_sched: sch_sfq: reject invalid perturb period Gerrard Tai reported that SFQ perturb_period has no range check yet, and this can be used to trigger a race condition fixed in a separate patch. We want to make sure ctl->perturb_period * HZ will not overflow and is positive. Tested: tc qd add dev lo root sfq perturb -10 # negative value : error Error: sch_sfq: invalid perturb period. tc qd add dev lo root sfq perturb 1000000000 # too big : error Error: sch_sfq: invalid perturb period. tc qd add dev lo root sfq perturb 2000000 # acceptable value tc -s -d qd sh dev lo qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Gerrard Tai Signed-off-by: Eric Dumazet Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_sfq.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 77fa02f2bfcd..a8cca549b5a2 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); return -EINVAL; } + + if (ctl->perturb_period < 0 || + ctl->perturb_period > INT_MAX / HZ) { + NL_SET_ERR_MSG_MOD(extack, "invalid perturb period"); + return -EINVAL; + } + perturb_period = ctl->perturb_period * HZ; + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) return -EINVAL; @@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, headdrop = q->headdrop; maxdepth = q->maxdepth; maxflows = q->maxflows; - perturb_period = q->perturb_period; quantum = q->quantum; flags = q->flags; /* update and validate configuration */ if (ctl->quantum) quantum = ctl->quantum; - perturb_period = ctl->perturb_period * HZ; if (ctl->flows) maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); if (ctl->divisor) { From d35acc1be3480505b5931f17e4ea9b7617fea4d3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:11 +0000 Subject: [PATCH 244/497] net_sched: prio: fix a race in prio_tune() Gerrard Tai reported a race condition in PRIO, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: 7b8e0b6e6599 ("net: sched: prio: delay destroying child qdiscs on change") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_prio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index cc30f7a32f1a..9e2b9a490db2 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -211,7 +211,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i = q->bands; i < oldbands; i++) - qdisc_tree_flush_backlog(q->queues[i]); + qdisc_purge_queue(q->queues[i]); for (i = oldbands; i < q->bands; i++) { q->queues[i] = queues[i]; From 85a3e0ede38450ea3053b8c45d28cf55208409b8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:12 +0000 Subject: [PATCH 245/497] net_sched: red: fix a race in __red_change() Gerrard Tai reported a race condition in RED, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: 0c8d13ac9607 ("net: sched: red: delay destroying child qdisc on replace") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_red.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 1ba3e0bba54f..4696c893cf55 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -285,7 +285,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, q->userbits = userbits; q->limit = ctl->limit; if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old_child = q->qdisc; q->qdisc = child; } From 43eb466041216d25dedaef1c383ad7bd89929cbc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:13 +0000 Subject: [PATCH 246/497] net_sched: tbf: fix a race in tbf_change() Gerrard Tai reported a race condition in TBF, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: b05972f01e7d ("net: sched: tbf: don't call qdisc_put() while holding tree lock") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Cc: Zhengchao Shao Link: https://patch.msgid.link/20250611111515.1983366-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_tbf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index dc26b22d53c7..4c977f049670 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -452,7 +452,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); if (child) { - qdisc_tree_flush_backlog(q->qdisc); + qdisc_purge_queue(q->qdisc); old = q->qdisc; q->qdisc = child; } From d92adacdd8c2960be856e0b82acc5b7c5395fddb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:14 +0000 Subject: [PATCH 247/497] net_sched: ets: fix a race in ets_qdisc_change() Gerrard Tai reported a race condition in ETS, whenever SFQ perturb timer fires at the wrong time. The race is as follows: CPU 0 CPU 1 [1]: lock root [2]: qdisc_tree_flush_backlog() [3]: unlock root | | [5]: lock root | [6]: rehash | [7]: qdisc_tree_reduce_backlog() | [4]: qdisc_put() This can be abused to underflow a parent's qlen. Calling qdisc_purge_queue() instead of qdisc_tree_flush_backlog() should fix the race, because all packets will be purged from the qdisc before releasing the lock. Fixes: b05972f01e7d ("net: sched: tbf: don't call qdisc_put() while holding tree lock") Reported-by: Gerrard Tai Suggested-by: Gerrard Tai Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_ets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 2c069f0181c6..037f764822b9 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -661,7 +661,7 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, for (i = q->nbands; i < oldbands; i++) { if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) list_del_init(&q->classes[i].alist); - qdisc_tree_flush_backlog(q->classes[i].qdisc); + qdisc_purge_queue(q->classes[i].qdisc); } WRITE_ONCE(q->nstrict, nstrict); memcpy(q->prio2band, priomap, sizeof(priomap)); From adcaa890c7a4a91a422168d8fb629183fff07b2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Jun 2025 11:15:15 +0000 Subject: [PATCH 248/497] net_sched: remove qdisc_tree_flush_backlog() This function is no longer used after the four prior fixes. Given all prior uses were wrong, it seems better to remove it. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250611111515.1983366-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/sch_generic.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 629368ab2787..638948be4c50 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -973,14 +973,6 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch, __u32 *qlen, *backlog = qstats.backlog; } -static inline void qdisc_tree_flush_backlog(struct Qdisc *sch) -{ - __u32 qlen, backlog; - - qdisc_qstats_qlen_backlog(sch, &qlen, &backlog); - qdisc_tree_reduce_backlog(sch, qlen, backlog); -} - static inline void qdisc_purge_queue(struct Qdisc *sch) { __u32 qlen, backlog; From 9337c54401a5bb6ac3c9f6c71dd2a9130cfba82e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 11 Jun 2025 14:40:04 +0200 Subject: [PATCH 249/497] veth: prevent NULL pointer dereference in veth_xdp_rcv The veth peer device is RCU protected, but when the peer device gets deleted (veth_dellink) then the pointer is assigned NULL (via RCU_INIT_POINTER). This patch adds a necessary NULL check in veth_xdp_rcv when accessing the veth peer net_device. This fixes a bug introduced in commit dc82a33297fc ("veth: apply qdisc backpressure on full ptr_ring to reduce TX drops"). The bug is a race and only triggers when having inflight packets on a veth that is being deleted. Reported-by: Ihor Solodrai Closes: https://lore.kernel.org/all/fecfcad0-7a16-42b8-bff2-66ee83a6e5c4@linux.dev/ Reported-by: syzbot+c4c7bf27f6b0c4bd97fe@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/683da55e.a00a0220.d8eae.0052.GAE@google.com/ Fixes: dc82a33297fc ("veth: apply qdisc backpressure on full ptr_ring to reduce TX drops") Signed-off-by: Jesper Dangaard Brouer Acked-by: Ihor Solodrai Link: https://patch.msgid.link/174964557873.519608.10855046105237280978.stgit@firesoul Signed-off-by: Jakub Kicinski --- drivers/net/veth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index e58a0f1b5c5b..a3046142cb8e 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -909,7 +909,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, /* NAPI functions as RCU section */ peer_dev = rcu_dereference_check(priv->peer, rcu_read_lock_bh_held()); - peer_txq = netdev_get_tx_queue(peer_dev, queue_idx); + peer_txq = peer_dev ? netdev_get_tx_queue(peer_dev, queue_idx) : NULL; for (i = 0; i < budget; i++) { void *ptr = __ptr_ring_consume(&rq->xdp_ring); @@ -959,7 +959,7 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, rq->stats.vs.xdp_packets += done; u64_stats_update_end(&rq->stats.syncp); - if (unlikely(netif_tx_queue_stopped(peer_txq))) + if (peer_txq && unlikely(netif_tx_queue_stopped(peer_txq))) netif_tx_wake_queue(peer_txq); return done; From 1619bdf4389c829f16af5c7d5b4fa5f1673614d7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 11 Jun 2025 16:14:32 +0300 Subject: [PATCH 250/497] net/mlx5: HWS, Add error checking to hws_bwc_rule_complex_hash_node_get() Check for if ida_alloc() or rhashtable_lookup_get_insert_fast() fails. Fixes: 17e0accac577 ("net/mlx5: HWS, support complex matchers") Signed-off-by: Dan Carpenter Reviewed-by: Yevgeny Kliteynik Link: https://patch.msgid.link/aEmBONjyiF6z5yCV@stanley.mountain Signed-off-by: Jakub Kicinski --- .../mlx5/core/steering/hws/bwc_complex.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index 70768953a4f6..ca7501c57468 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -1070,7 +1070,7 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node; struct rhashtable *refcount_hash; - int i; + int ret, i; bwc_rule->complex_hash_node = NULL; @@ -1078,7 +1078,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(!node)) return -ENOMEM; - node->tag = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + ret = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + if (ret < 0) + goto err_free_node; + node->tag = ret; + refcount_set(&node->refcount, 1); /* Clear match buffer - turn off all the unrelated fields @@ -1094,6 +1098,11 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, old_node = rhashtable_lookup_get_insert_fast(refcount_hash, &node->hash_node, hws_refcount_hash); + if (IS_ERR(old_node)) { + ret = PTR_ERR(old_node); + goto err_free_ida; + } + if (old_node) { /* Rule with the same tag already exists - update refcount */ refcount_inc(&old_node->refcount); @@ -1112,6 +1121,12 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, bwc_rule->complex_hash_node = node; return 0; + +err_free_ida: + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); +err_free_node: + kfree(node); + return ret; } static void From 1264971017b4d7141352a7fe29021bdfce5d885d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 11 Jun 2025 10:46:43 -0700 Subject: [PATCH 251/497] net: drv: netdevsim: don't napi_complete() from netpoll netdevsim supports netpoll. Make sure we don't call napi_complete() from it, since it may not be scheduled. Breno reports hitting a warning in napi_complete_done(): WARNING: CPU: 14 PID: 104 at net/core/dev.c:6592 napi_complete_done+0x2cc/0x560 __napi_poll+0x2d8/0x3a0 handle_softirqs+0x1fe/0x710 This is presumably after netpoll stole the SCHED bit prematurely. Reported-by: Breno Leitao Fixes: 3762ec05a9fb ("netdevsim: add NAPI support") Tested-by: Breno Leitao Link: https://patch.msgid.link/20250611174643.2769263-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index af545d42961c..fa5fbd97ad69 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -371,7 +371,8 @@ static int nsim_poll(struct napi_struct *napi, int budget) int done; done = nsim_rcv(rq, budget); - napi_complete(napi); + if (done < budget) + napi_complete_done(napi, done); return done; } From b3979e3d2fc92bf1a2da614fc383b75b9859ef58 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Jun 2025 12:35:02 -0700 Subject: [PATCH 252/497] ipv6: Move fib6_config_validate() to ip6_route_add(). syzkaller created an IPv6 route from a malformed packet, which has a prefix len > 128, triggering the splat below. [0] This is a similar issue fixed by commit 586ceac9acb7 ("ipv6: Restore fib6_config validation for SIOCADDRT."). The cited commit removed fib6_config validation from some callers of ip6_add_route(). Let's move the validation back to ip6_route_add() and ip6_route_multipath_add(). [0]: UBSAN: array-index-out-of-bounds in ./include/net/ipv6.h:616:34 index 20 is out of range for type '__u8 [16]' CPU: 1 UID: 0 PID: 7444 Comm: syz.0.708 Not tainted 6.16.0-rc1-syzkaller-g19272b37aa4f #0 PREEMPT Hardware name: riscv-virtio,qemu (DT) Call Trace: [] dump_backtrace+0x2e/0x3c arch/riscv/kernel/stacktrace.c:132 [] show_stack+0x30/0x3c arch/riscv/kernel/stacktrace.c:138 [] __dump_stack lib/dump_stack.c:94 [inline] [] dump_stack_lvl+0x12e/0x1a6 lib/dump_stack.c:120 [] dump_stack+0x1c/0x24 lib/dump_stack.c:129 [] ubsan_epilogue+0x14/0x46 lib/ubsan.c:233 [] __ubsan_handle_out_of_bounds+0xf6/0xf8 lib/ubsan.c:455 [] ipv6_addr_prefix include/net/ipv6.h:616 [inline] [] ip6_route_info_create+0x8f8/0x96e net/ipv6/route.c:3793 [] ip6_route_add+0x2a/0x1aa net/ipv6/route.c:3889 [] addrconf_prefix_route+0x2c4/0x4e8 net/ipv6/addrconf.c:2487 [] addrconf_prefix_rcv+0x1720/0x1e62 net/ipv6/addrconf.c:2878 [] ndisc_router_discovery+0x1a06/0x3504 net/ipv6/ndisc.c:1570 [] ndisc_rcv+0x500/0x600 net/ipv6/ndisc.c:1874 [] icmpv6_rcv+0x145e/0x1e0a net/ipv6/icmp.c:988 [] ip6_protocol_deliver_rcu+0x18a/0x1976 net/ipv6/ip6_input.c:436 [] ip6_input_finish+0xf4/0x174 net/ipv6/ip6_input.c:480 [] NF_HOOK include/linux/netfilter.h:317 [inline] [] NF_HOOK include/linux/netfilter.h:311 [inline] [] ip6_input+0x16a/0x70c net/ipv6/ip6_input.c:491 [] ip6_mc_input+0x5c8/0x1268 net/ipv6/ip6_input.c:588 [] dst_input include/net/dst.h:469 [inline] [] ip6_rcv_finish net/ipv6/ip6_input.c:79 [inline] [] NF_HOOK include/linux/netfilter.h:317 [inline] [] NF_HOOK include/linux/netfilter.h:311 [inline] [] ipv6_rcv+0x5ae/0x6e0 net/ipv6/ip6_input.c:309 [] __netif_receive_skb_one_core+0x106/0x16e net/core/dev.c:5977 [] __netif_receive_skb+0x2c/0x144 net/core/dev.c:6090 [] netif_receive_skb_internal net/core/dev.c:6176 [inline] [] netif_receive_skb+0x1aa/0xbf2 net/core/dev.c:6235 [] tun_rx_batched.isra.0+0x430/0x686 drivers/net/tun.c:1485 [] tun_get_user+0x2952/0x3d6c drivers/net/tun.c:1938 [] tun_chr_write_iter+0xc4/0x21c drivers/net/tun.c:1984 [] new_sync_write fs/read_write.c:593 [inline] [] vfs_write+0x56c/0xa9a fs/read_write.c:686 [] ksys_write+0x126/0x228 fs/read_write.c:738 [] __do_sys_write fs/read_write.c:749 [inline] [] __se_sys_write fs/read_write.c:746 [inline] [] __riscv_sys_write+0x6e/0x94 fs/read_write.c:746 [] syscall_handler+0x94/0x118 arch/riscv/include/asm/syscall.h:112 [] do_trap_ecall_u+0x396/0x530 arch/riscv/kernel/traps.c:341 [] handle_exception+0x146/0x152 arch/riscv/kernel/entry.S:197 Fixes: fa76c1674f2e ("ipv6: Move some validation from ip6_route_info_create() to rtm_to_fib6_config().") Reported-by: syzbot+4c2358694722d304c44e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6849b8c3.a00a0220.1eb5f5.00f0.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250611193551.2999991-1-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 110 +++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0143262094b0..79c8f1acf8a3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3737,6 +3737,53 @@ void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) } } +static int fib6_config_validate(struct fib6_config *cfg, + struct netlink_ext_ack *extack) +{ + /* RTF_PCPU is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_PCPU) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); + goto errout; + } + + /* RTF_CACHE is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_CACHE) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); + goto errout; + } + + if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); + goto errout; + } + + if (cfg->fc_dst_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid prefix length"); + goto errout; + } + +#ifdef CONFIG_IPV6_SUBTREES + if (cfg->fc_src_len > 128) { + NL_SET_ERR_MSG(extack, "Invalid source address length"); + goto errout; + } + + if (cfg->fc_nh_id && cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + goto errout; + } +#else + if (cfg->fc_src_len) { + NL_SET_ERR_MSG(extack, + "Specifying source address requires IPV6_SUBTREES to be enabled"); + goto errout; + } +#endif + return 0; +errout: + return -EINVAL; +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) @@ -3886,6 +3933,10 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct fib6_info *rt; int err; + err = fib6_config_validate(cfg, extack); + if (err) + return err; + rt = ip6_route_info_create(cfg, gfp_flags, extack); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -4479,53 +4530,6 @@ void rt6_purge_dflt_routers(struct net *net) rcu_read_unlock(); } -static int fib6_config_validate(struct fib6_config *cfg, - struct netlink_ext_ack *extack) -{ - /* RTF_PCPU is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_PCPU) { - NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); - goto errout; - } - - /* RTF_CACHE is an internal flag; can not be set by userspace */ - if (cfg->fc_flags & RTF_CACHE) { - NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); - goto errout; - } - - if (cfg->fc_type > RTN_MAX) { - NL_SET_ERR_MSG(extack, "Invalid route type"); - goto errout; - } - - if (cfg->fc_dst_len > 128) { - NL_SET_ERR_MSG(extack, "Invalid prefix length"); - goto errout; - } - -#ifdef CONFIG_IPV6_SUBTREES - if (cfg->fc_src_len > 128) { - NL_SET_ERR_MSG(extack, "Invalid source address length"); - goto errout; - } - - if (cfg->fc_nh_id && cfg->fc_src_len) { - NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); - goto errout; - } -#else - if (cfg->fc_src_len) { - NL_SET_ERR_MSG(extack, - "Specifying source address requires IPV6_SUBTREES to be enabled"); - goto errout; - } -#endif - return 0; -errout: - return -EINVAL; -} - static void rtmsg_to_fib6_config(struct net *net, struct in6_rtmsg *rtmsg, struct fib6_config *cfg) @@ -4563,10 +4567,6 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) switch (cmd) { case SIOCADDRT: - err = fib6_config_validate(&cfg, NULL); - if (err) - break; - /* Only do the default setting of fc_metric in route adding */ if (cfg.fc_metric == 0) cfg.fc_metric = IP6_RT_PRIO_USER; @@ -5402,6 +5402,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, int nhn = 0; int err; + err = fib6_config_validate(cfg, extack); + if (err) + return err; + replace = (cfg->fc_nlinfo.nlh && (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); @@ -5636,10 +5640,6 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; - err = fib6_config_validate(&cfg, extack); - if (err) - return err; - if (cfg.fc_metric == 0) cfg.fc_metric = IP6_RT_PRIO_USER; From 43fb2b30eea7cfc40214484935b026ec29838a91 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 11 Jun 2025 13:27:35 -0700 Subject: [PATCH 253/497] af_unix: Allow passing cred for embryo without SO_PASSCRED/SO_PASSPIDFD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before the cited commit, the kernel unconditionally embedded SCM credentials to skb for embryo sockets even when both the sender and listener disabled SO_PASSCRED and SO_PASSPIDFD. Now, the credentials are added to skb only when configured by the sender or the listener. However, as reported in the link below, it caused a regression for some programs that assume credentials are included in every skb, but sometimes not now. The only problematic scenario would be that a socket starts listening before setting the option. Then, there will be 2 types of non-small race window, where a client can send skb without credentials, which the peer receives as an "invalid" message (and aborts the connection it seems ?): Client Server ------ ------ s1.listen() <-- No SO_PASS{CRED,PIDFD} s2.connect() s2.send() <-- w/o cred s1.setsockopt(SO_PASS{CRED,PIDFD}) s2.send() <-- w/ cred or Client Server ------ ------ s1.listen() <-- No SO_PASS{CRED,PIDFD} s2.connect() s2.send() <-- w/o cred s3, _ = s1.accept() <-- Inherit cred options s2.send() <-- w/o cred but not set yet s3.setsockopt(SO_PASS{CRED,PIDFD}) s2.send() <-- w/ cred It's unfortunate that buggy programs depend on the behaviour, but let's restore the previous behaviour. Fixes: 3f84d577b79d ("af_unix: Inherit sk_flags at connect().") Reported-by: Jacek Łuczak Closes: https://lore.kernel.org/all/68d38b0b-1666-4974-85d4-15575789c8d4@gmail.com/ Signed-off-by: Kuniyuki Iwashima Tested-by: Christian Heusel Tested-by: André Almeida Tested-by: Jacek Łuczak Link: https://patch.msgid.link/20250611202758.3075858-1-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- net/unix/af_unix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2e2e9997a68e..22e170fb5dda 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1971,7 +1971,8 @@ static void unix_maybe_add_creds(struct sk_buff *skb, const struct sock *sk, if (UNIXCB(skb).pid) return; - if (unix_may_passcred(sk) || unix_may_passcred(other)) { + if (unix_may_passcred(sk) || unix_may_passcred(other) || + !other->sk_socket) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } From d78ebc772c7ceccf6e655ddb93099f49a1268af4 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 12 Jun 2025 10:19:57 +0300 Subject: [PATCH 254/497] net: ethtool: Don't check if RSS context exists in case of context 0 Context 0 (default context) always exists, there is no need to check whether it exists or not when adding a flow steering rule. The existing check fails when creating a flow steering rule for context 0 as it is not stored in the rss_ctx xarray. For example: $ ethtool --config-ntuple eth2 flow-type tcp4 dst-ip 194.237.147.23 dst-port 19983 context 0 loc 618 rmgr: Cannot insert RX class rule: Invalid argument Cannot insert classification rule An example usecase for this could be: - A high-priority rule (loc 0) directing specific port traffic to context 0. - A low-priority rule (loc 1) directing all other TCP traffic to context 1. This is a user-visible regression that was caught in our testing environment, it was not reported by a user yet. Fixes: de7f7582dff2 ("net: ethtool: prevent flow steering to RSS contexts which don't exist") Reviewed-by: Tariq Toukan Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Reviewed-by: Joe Damato Reviewed-by: Edward Cree Link: https://patch.msgid.link/20250612071958.1696361-2-gal@nvidia.com Signed-off-by: Jakub Kicinski --- net/ethtool/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 39ec920f5de7..71c828d0bf31 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1083,7 +1083,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; - if (!xa_load(&dev->ethtool->rss_ctx, info.rss_context)) + if (info.rss_context && + !xa_load(&dev->ethtool->rss_ctx, info.rss_context)) return -EINVAL; } From 56c5d291e88538621029e5c7c5f60540d37846a8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 12 Jun 2025 10:19:58 +0300 Subject: [PATCH 255/497] selftests: drv-net: rss_ctx: Add test for ntuple rules targeting default RSS context Add test_rss_default_context_rule() to verify that ntuple rules can correctly direct traffic to the default RSS context (context 0). The test creates two ntuple rules with explicit location priorities: - A high-priority rule (loc 0) directing specific port traffic to context 0. - A low-priority rule (loc 1) directing all other TCP traffic to context 1. This validates that: 1. Rules targeting the default context function properly. 2. Traffic steering works as expected when mixing default and additional RSS contexts. The test was written by AI, and reviewed by humans. Reviewed-by: Nimrod Oren Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20250612071958.1696361-3-gal@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/hw/rss_ctx.py | 59 ++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca60ae325c22..7bb552f8b182 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -747,6 +747,62 @@ def test_rss_ntuple_addition(cfg): 'noise' : (0,) }) +def test_rss_default_context_rule(cfg): + """ + Allocate a port, direct this port to context 0, then create a new RSS + context and steer all TCP traffic to it (context 1). Verify that: + * Traffic to the specific port continues to use queues of the main + context (0/1). + * Traffic to any other TCP port is redirected to the new context + (queues 2/3). + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + # Use queues 0 and 1 for the main context + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + # Create a new RSS context that uses queues 2 and 3 + ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # Generic low-priority rule: redirect all TCP traffic to the new context. + # Give it an explicit higher location number (lower priority). + flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1" + ethtool(f"-N {cfg.ifname} {flow_generic}") + defer(ethtool, f"-N {cfg.ifname} delete 1") + + # Specific high-priority rule for a random port that should stay on context 0. + # Assign loc 0 so it is evaluated before the generic rule. + port_main = rand_port() + flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0" + ethtool(f"-N {cfg.ifname} {flow_main}") + defer(ethtool, f"-N {cfg.ifname} delete 0") + + _ntuple_rule_check(cfg, 1, ctx_id) + + # Verify that traffic matching the specific rule still goes to queues 0/1 + _send_traffic_check(cfg, port_main, "context 0", + { 'target': (0, 1), + 'empty' : (2, 3) }) + + # And that traffic for any other port is steered to the new context + port_other = rand_port() + _send_traffic_check(cfg, port_other, f"context {ctx_id}", + { 'target': (2, 3), + 'noise' : (0, 1) }) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.context_cnt = None @@ -760,7 +816,8 @@ def main() -> None: test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg, test_flow_add_context_missing, - test_delete_rss_context_busy, test_rss_ntuple_addition], + test_delete_rss_context_busy, test_rss_ntuple_addition, + test_rss_default_context_rule], args=(cfg, )) ksft_exit() From 9c7632faad434c98f1f2cc06f3647a5a5d05ddbf Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 4 Jun 2025 08:03:05 -0700 Subject: [PATCH 256/497] drm/xe/lrc: Use a temporary buffer for WA BB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case the BO is in iomem, we can't simply take the vaddr and write to it. Instead, prepare a separate buffer that is later copied into io memory. Right now it's just a few words that could be using xe_map_write32(), but the intention is to grow the WA BB for other uses. Fixes: 617d824c5323 ("drm/xe: Add WA BB to capture active context utilization") Cc: Umesh Nerlige Ramappa Cc: Tvrtko Ursulin Reviewed-by: Matthew Brost Reviewed-by: Umesh Nerlige Ramappa Link: https://lore.kernel.org/r/20250604-wa-bb-fix-v1-1-0dfc5dafcef0@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit ef48715b2d3df17c060e23b9aa636af3d95652f8) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_lrc.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 63d74e27f54c..bf7c3981897d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -941,11 +941,18 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static void xe_lrc_setup_utilization(struct xe_lrc *lrc) +static int xe_lrc_setup_utilization(struct xe_lrc *lrc) { - u32 *cmd; + u32 *cmd, *buf = NULL; - cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + if (lrc->bb_per_ctx_bo->vmap.is_iomem) { + buf = kmalloc(lrc->bb_per_ctx_bo->size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + cmd = buf; + } else { + cmd = lrc->bb_per_ctx_bo->vmap.vaddr; + } *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET; *cmd++ = ENGINE_ID(0).addr; @@ -966,9 +973,16 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc) *cmd++ = MI_BATCH_BUFFER_END; + if (buf) { + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bb_per_ctx_bo->vmap, 0, + buf, (cmd - buf) * sizeof(*cmd)); + kfree(buf); + } + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1); + return 0; } #define PVC_CTX_ASID (0x2e + 1) @@ -1125,7 +1139,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, map = __xe_lrc_start_seqno_map(lrc); xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - xe_lrc_setup_utilization(lrc); + err = xe_lrc_setup_utilization(lrc); + if (err) + goto err_lrc_finish; return 0; From d2be3270f40b1330f8c1c9512c59dd085a758ac0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 11 Jun 2025 17:28:13 +0100 Subject: [PATCH 257/497] arm64/gcs: Don't call gcs_free() during flush_gcs() Currently we call gcs_free() during flush_gcs() to reset the thread state for GCS. This includes unmapping any kernel allocated GCS, but this is redundant when doing a flush_thread() since we are reinitialising the thread memory too. Inline the reinitialisation of the thread struct. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20250611-arm64-gcs-flush-thread-v1-1-cc26feeddabd@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a5ca15daeb8a..5954cec19660 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -288,7 +288,9 @@ static void flush_gcs(void) if (!system_supports_gcs()) return; - gcs_free(current); + current->thread.gcspr_el0 = 0; + current->thread.gcs_base = 0; + current->thread.gcs_size = 0; current->thread.gcs_el0_mode = 0; write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1); write_sysreg_s(0, SYS_GCSPR_EL0); From 39dfc971e42d886e7df01371cd1bef505076d84c Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Wed, 4 Jun 2025 00:55:33 +0000 Subject: [PATCH 258/497] arm64/ptrace: Fix stack-out-of-bounds read in regs_get_kernel_stack_nth() KASAN reports a stack-out-of-bounds read in regs_get_kernel_stack_nth(). Call Trace: [ 97.283505] BUG: KASAN: stack-out-of-bounds in regs_get_kernel_stack_nth+0xa8/0xc8 [ 97.284677] Read of size 8 at addr ffff800089277c10 by task 1.sh/2550 [ 97.285732] [ 97.286067] CPU: 7 PID: 2550 Comm: 1.sh Not tainted 6.6.0+ #11 [ 97.287032] Hardware name: linux,dummy-virt (DT) [ 97.287815] Call trace: [ 97.288279] dump_backtrace+0xa0/0x128 [ 97.288946] show_stack+0x20/0x38 [ 97.289551] dump_stack_lvl+0x78/0xc8 [ 97.290203] print_address_description.constprop.0+0x84/0x3c8 [ 97.291159] print_report+0xb0/0x280 [ 97.291792] kasan_report+0x84/0xd0 [ 97.292421] __asan_load8+0x9c/0xc0 [ 97.293042] regs_get_kernel_stack_nth+0xa8/0xc8 [ 97.293835] process_fetch_insn+0x770/0xa30 [ 97.294562] kprobe_trace_func+0x254/0x3b0 [ 97.295271] kprobe_dispatcher+0x98/0xe0 [ 97.295955] kprobe_breakpoint_handler+0x1b0/0x210 [ 97.296774] call_break_hook+0xc4/0x100 [ 97.297451] brk_handler+0x24/0x78 [ 97.298073] do_debug_exception+0xac/0x178 [ 97.298785] el1_dbg+0x70/0x90 [ 97.299344] el1h_64_sync_handler+0xcc/0xe8 [ 97.300066] el1h_64_sync+0x78/0x80 [ 97.300699] kernel_clone+0x0/0x500 [ 97.301331] __arm64_sys_clone+0x70/0x90 [ 97.302084] invoke_syscall+0x68/0x198 [ 97.302746] el0_svc_common.constprop.0+0x11c/0x150 [ 97.303569] do_el0_svc+0x38/0x50 [ 97.304164] el0_svc+0x44/0x1d8 [ 97.304749] el0t_64_sync_handler+0x100/0x130 [ 97.305500] el0t_64_sync+0x188/0x190 [ 97.306151] [ 97.306475] The buggy address belongs to stack of task 1.sh/2550 [ 97.307461] and is located at offset 0 in frame: [ 97.308257] __se_sys_clone+0x0/0x138 [ 97.308910] [ 97.309241] This frame has 1 object: [ 97.309873] [48, 184) 'args' [ 97.309876] [ 97.310749] The buggy address belongs to the virtual mapping at [ 97.310749] [ffff800089270000, ffff800089279000) created by: [ 97.310749] dup_task_struct+0xc0/0x2e8 [ 97.313347] [ 97.313674] The buggy address belongs to the physical page: [ 97.314604] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x14f69a [ 97.315885] flags: 0x15ffffe00000000(node=1|zone=2|lastcpupid=0xfffff) [ 97.316957] raw: 015ffffe00000000 0000000000000000 dead000000000122 0000000000000000 [ 97.318207] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 [ 97.319445] page dumped because: kasan: bad access detected [ 97.320371] [ 97.320694] Memory state around the buggy address: [ 97.321511] ffff800089277b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 97.322681] ffff800089277b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 97.323846] >ffff800089277c00: 00 00 f1 f1 f1 f1 f1 f1 00 00 00 00 00 00 00 00 [ 97.325023] ^ [ 97.325683] ffff800089277c80: 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 f3 f3 [ 97.326856] ffff800089277d00: f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 00 00 This issue seems to be related to the behavior of some gcc compilers and was also fixed on the s390 architecture before: commit d93a855c31b7 ("s390/ptrace: Avoid KASAN false positives in regs_get_kernel_stack_nth()") As described in that commit, regs_get_kernel_stack_nth() has confirmed that `addr` is on the stack, so reading the value at `*addr` should be allowed. Use READ_ONCE_NOCHECK() helper to silence the KASAN check for this case. Fixes: 0a8ea52c3eb1 ("arm64: Add HAVE_REGS_AND_STACK_ACCESS_API feature") Signed-off-by: Tengda Wu Link: https://lore.kernel.org/r/20250604005533.1278992-1-wutengda@huaweicloud.com [will: Use '*addr' as the argument to READ_ONCE_NOCHECK()] Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index a360e52db02f..ee94b72bf8fb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -141,7 +141,7 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; + return READ_ONCE_NOCHECK(*addr); else return 0; } From b64af6bcd3b0f3fc633d6a70adb0991737abfef4 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 12 Jun 2025 12:45:04 -0300 Subject: [PATCH 259/497] smb: client: fix perf regression with deferred closes Customer reported that one of their applications started failing to open files with STATUS_INSUFFICIENT_RESOURCES due to NetApp server hitting the maximum number of opens to same file that it would allow for a single client connection. It turned out the client was failing to reuse open handles with deferred closes because matching ->f_flags directly without masking off O_CREAT|O_EXCL|O_TRUNC bits first broke the comparision and then client ended up with thousands of deferred closes to same file. Those bits are already satisfied on the original open, so no need to check them against existing open handles. Reproducer: #include #include #include #include #include #include #define NR_THREADS 4 #define NR_ITERATIONS 2500 #define TEST_FILE "/mnt/1/test/dir/foo" static char buf[64]; static void *worker(void *arg) { int i, j; int fd; for (i = 0; i < NR_ITERATIONS; i++) { fd = open(TEST_FILE, O_WRONLY|O_CREAT|O_APPEND, 0666); for (j = 0; j < 16; j++) write(fd, buf, sizeof(buf)); close(fd); } } int main(int argc, char *argv[]) { pthread_t t[NR_THREADS]; int fd; int i; fd = open(TEST_FILE, O_WRONLY|O_CREAT|O_TRUNC, 0666); close(fd); memset(buf, 'a', sizeof(buf)); for (i = 0; i < NR_THREADS; i++) pthread_create(&t[i], NULL, worker, NULL); for (i = 0; i < NR_THREADS; i++) pthread_join(t[i], NULL); return 0; } Before patch: $ mount.cifs //srv/share /mnt/1 -o ... $ mkdir -p /mnt/1/test/dir $ gcc repro.c && ./a.out ... number of opens: 1391 After patch: $ mount.cifs //srv/share /mnt/1 -o ... $ mkdir -p /mnt/1/test/dir $ gcc repro.c && ./a.out ... number of opens: 1 Cc: linux-cifs@vger.kernel.org Cc: David Howells Cc: Jay Shin Cc: Pierguido Lambri Fixes: b8ea3b1ff544 ("smb: enable reuse of deferred file handles for write operations") Acked-by: Shyam Prasad N Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French --- fs/smb/client/file.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index d2df10b8e6fd..9835672267d2 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -999,15 +999,18 @@ int cifs_open(struct inode *inode, struct file *file) rc = cifs_get_readable_path(tcon, full_path, &cfile); } if (rc == 0) { - if (file->f_flags == cfile->f_flags) { + unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC); + + if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) && + (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) { file->private_data = cfile; spin_lock(&CIFS_I(inode)->deferred_lock); cifs_del_deferred_close(cfile); spin_unlock(&CIFS_I(inode)->deferred_lock); goto use_cache; - } else { - _cifsFileInfo_put(cfile, true, false); } + _cifsFileInfo_put(cfile, true, false); } else { /* hard link on the defeered close file */ rc = cifs_get_hardlink_path(tcon, inode, file); From 72dd7961a4bb4fa1fc456169a61dd12e68e50645 Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Wed, 11 Jun 2025 16:59:02 +0530 Subject: [PATCH 260/497] smb: improve directory cache reuse for readdir operations Currently, cached directory contents were not reused across subsequent 'ls' operations because the cache validity check relied on comparing the ctx pointer, which changes with each readdir invocation. As a result, the cached dir entries was not marked as valid and the cache was not utilized for subsequent 'ls' operations. This change uses the file pointer, which remains consistent across all readdir calls for a given directory instance, to associate and validate the cache. As a result, cached directory contents can now be correctly reused, improving performance for repeated directory listings. Performance gains with local windows SMB server: Without the patch and default actimeo=1: 1000 directory enumeration operations on dir with 10k files took 135.0s With this patch and actimeo=0: 1000 directory enumeration operations on dir with 10k files took just 5.1s Signed-off-by: Bharath SM Reviewed-by: Shyam Prasad N Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cached_dir.h | 8 ++++---- fs/smb/client/readdir.c | 28 +++++++++++++++------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index 1dfe79d947a6..bc8a812ff95f 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -21,10 +21,10 @@ struct cached_dirent { struct cached_dirents { bool is_valid:1; bool is_failed:1; - struct dir_context *ctx; /* - * Only used to make sure we only take entries - * from a single context. Never dereferenced. - */ + struct file *file; /* + * Used to associate the cache with a single + * open file instance. + */ struct mutex de_mutex; int pos; /* Expected ctx->pos */ struct list_head entries; diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index f9f11cbf89be..ba0193cf9033 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -851,9 +851,9 @@ static bool emit_cached_dirents(struct cached_dirents *cde, } static void update_cached_dirents_count(struct cached_dirents *cde, - struct dir_context *ctx) + struct file *file) { - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -862,9 +862,9 @@ static void update_cached_dirents_count(struct cached_dirents *cde, } static void finished_cached_dirents_count(struct cached_dirents *cde, - struct dir_context *ctx) + struct dir_context *ctx, struct file *file) { - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -877,11 +877,12 @@ static void finished_cached_dirents_count(struct cached_dirents *cde, static void add_cached_dirent(struct cached_dirents *cde, struct dir_context *ctx, const char *name, int namelen, - struct cifs_fattr *fattr) + struct cifs_fattr *fattr, + struct file *file) { struct cached_dirent *de; - if (cde->ctx != ctx) + if (cde->file != file) return; if (cde->is_valid || cde->is_failed) return; @@ -911,7 +912,8 @@ static void add_cached_dirent(struct cached_dirents *cde, static bool cifs_dir_emit(struct dir_context *ctx, const char *name, int namelen, struct cifs_fattr *fattr, - struct cached_fid *cfid) + struct cached_fid *cfid, + struct file *file) { bool rc; ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); @@ -923,7 +925,7 @@ static bool cifs_dir_emit(struct dir_context *ctx, if (cfid) { mutex_lock(&cfid->dirents.de_mutex); add_cached_dirent(&cfid->dirents, ctx, name, namelen, - fattr); + fattr, file); mutex_unlock(&cfid->dirents.de_mutex); } @@ -1023,7 +1025,7 @@ static int cifs_filldir(char *find_entry, struct file *file, cifs_prime_dcache(file_dentry(file), &name, &fattr); return !cifs_dir_emit(ctx, name.name, name.len, - &fattr, cfid); + &fattr, cfid, file); } @@ -1074,8 +1076,8 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) * we need to initialize scanning and storing the * directory content. */ - if (ctx->pos == 0 && cfid->dirents.ctx == NULL) { - cfid->dirents.ctx = ctx; + if (ctx->pos == 0 && cfid->dirents.file == NULL) { + cfid->dirents.file = file; cfid->dirents.pos = 2; } /* @@ -1143,7 +1145,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) } else { if (cfid) { mutex_lock(&cfid->dirents.de_mutex); - finished_cached_dirents_count(&cfid->dirents, ctx); + finished_cached_dirents_count(&cfid->dirents, ctx, file); mutex_unlock(&cfid->dirents.de_mutex); } cifs_dbg(FYI, "Could not find entry\n"); @@ -1184,7 +1186,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx) ctx->pos++; if (cfid) { mutex_lock(&cfid->dirents.de_mutex); - update_cached_dirents_count(&cfid->dirents, ctx); + update_cached_dirents_count(&cfid->dirents, file); mutex_unlock(&cfid->dirents.de_mutex); } From 32ce6b3a83b71d8abf0c0837dc78775f16c9902f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jun 2025 18:08:51 -0400 Subject: [PATCH 261/497] NFSD: Avoid corruption of a referring call list The new code neglects to remove a freshly-allocated RCL from the callback's referring call list when no matching referring call is found. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202505171002.cE46sdj5-lkp@intel.com/ Fixes: 4f3c8d8c9e10 ("NFSD: Implement CB_SEQUENCE referring call lists") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4callback.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ccb00aa93be0..e00b2aea8da2 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1409,6 +1409,7 @@ void nfsd41_cb_referring_call(struct nfsd4_callback *cb, out: if (!rcl->__nr_referring_calls) { cb->cb_nr_referring_call_list--; + list_del(&rcl->__list); kfree(rcl); } } From 8b3ac9fabaa825b7bae850ee7b4580c5cba32699 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 9 Jun 2025 13:21:56 -0400 Subject: [PATCH 262/497] SUNRPC: Cleanup/fix initial rq_pages allocation While investigating some reports of memory-constrained NUMA machines failing to mount v3 and v4.0 nfs mounts, we found that svc_init_buffer() was not attempting to retry allocations from the bulk page allocator. Typically, this results in a single page allocation being returned and the mount attempt fails with -ENOMEM. A retry would have allowed the mount to succeed. Additionally, it seems that the bulk allocation in svc_init_buffer() is redundant because svc_alloc_arg() will perform the required allocation and does the correct thing to retry the allocations. The call to allocate memory in svc_alloc_arg() drops the preferred node argument, but I expect we'll still allocate on the preferred node because the allocation call happens within the svc thread context, which chooses the node with memory closest to the current thread's execution. This patch cleans out the bulk allocation in svc_init_buffer() to allow svc_alloc_arg() to handle the allocation/retry logic for rq_pages. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Fixes: ed603bcf4fea ("sunrpc: Replace the rq_pages array with dynamically-allocated memory") Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 939b6239df8a..ef8a05aac87f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -638,8 +638,6 @@ EXPORT_SYMBOL_GPL(svc_destroy); static bool svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) { - unsigned long ret; - rqstp->rq_maxpages = svc_serv_maxpages(serv); /* rq_pages' last entry is NULL for historical reasons. */ @@ -649,9 +647,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) if (!rqstp->rq_pages) return false; - ret = alloc_pages_bulk_node(GFP_KERNEL, node, rqstp->rq_maxpages, - rqstp->rq_pages); - return ret == rqstp->rq_maxpages; + return true; } /* From 5466491c9e3309ed5c7adbb8fad6e93fcc9a8fe9 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 9 Jun 2025 14:28:27 -0700 Subject: [PATCH 263/497] ionic: Prevent driver/fw getting out of sync on devcmd(s) Some stress/negative firmware testing around devcmd(s) returning EAGAIN found that the done bit could get out of sync in the firmware when it wasn't cleared in a retry case. While here, change the type of the local done variable to a bool to match the return type from ionic_dev_cmd_done(). Fixes: ec8ee714736e ("ionic: stretch heartbeat detection") Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250609212827.53842-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index daf1e82cb76b..0e60a6bef99a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -516,9 +516,9 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, unsigned long start_time; unsigned long max_wait; unsigned long duration; - int done = 0; bool fw_up; int opcode; + bool done; int err; /* Wait for dev cmd to complete, retrying if we get EAGAIN, @@ -526,6 +526,7 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds, */ max_wait = jiffies + (max_seconds * HZ); try_again: + done = false; opcode = idev->opcode; start_time = jiffies; for (fw_up = ionic_is_fw_running(idev); From bb666b7c27073b986b75699e51a7102910f58060 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 9 Jun 2025 17:57:49 +0100 Subject: [PATCH 264/497] mm: add mmap_prepare() compatibility layer for nested file systems Nested file systems, that is those which invoke call_mmap() within their own f_op->mmap() handlers, may encounter underlying file systems which provide the f_op->mmap_prepare() hook introduced by commit c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). We have a chicken-and-egg scenario here - until all file systems are converted to using .mmap_prepare(), we cannot convert these nested handlers, as we can't call f_op->mmap from an .mmap_prepare() hook. So we have to do it the other way round - invoke the .mmap_prepare() hook from an .mmap() one. in order to do so, we need to convert VMA state into a struct vm_area_desc descriptor, invoking the underlying file system's f_op->mmap_prepare() callback passing a pointer to this, and then setting VMA state accordingly and safely. This patch achieves this via the compat_vma_mmap_prepare() function, which we invoke from call_mmap() if f_op->mmap_prepare() is specified in the passed in file pointer. We place the fundamental logic into mm/vma.h where VMA manipulation belongs. We also update the VMA userland tests to accommodate the changes. The compat_vma_mmap_prepare() function and its associated machinery is temporary, and will be removed once the conversion of file systems is complete. We carefully place this code so it can be used with CONFIG_MMU and also with cutting edge nommu silicon. [akpm@linux-foundation.org: export compat_vma_mmap_prepare tp fix build] [lorenzo.stoakes@oracle.com: remove unused declarations] Link: https://lkml.kernel.org/r/ac3ae324-4c65-432a-8c6d-2af988b18ac8@lucifer.local Link: https://lkml.kernel.org/r/20250609165749.344976-1-lorenzo.stoakes@oracle.com Fixes: c84bf6dd2b83 ("mm: introduce new .mmap_prepare() file callback"). Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Closes: https://lore.kernel.org/linux-mm/CAG48ez04yOEVx1ekzOChARDDBZzAKwet8PEoPM4Ln3_rk91AzQ@mail.gmail.com/ Reviewed-by: Pedro Falcato Reviewed-by: Vlastimil Babka Cc: Al Viro Cc: Christian Brauner Cc: Jan Kara Cc: Jann Horn Cc: Liam Howlett Signed-off-by: Andrew Morton --- include/linux/fs.h | 6 ++-- mm/util.c | 40 +++++++++++++++++++++++++++ mm/vma.c | 1 - mm/vma.h | 47 ++++++++++++++++++++++++++++++++ tools/testing/vma/vma_internal.h | 16 +++++++++++ 5 files changed, 107 insertions(+), 3 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 96c7925a6551..4ec77da65f14 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2274,10 +2274,12 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) return true; } +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma); + static inline int call_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } diff --git a/mm/util.c b/mm/util.c index 448117da071f..0b270c43d7d1 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1131,3 +1131,43 @@ void flush_dcache_folio(struct folio *folio) } EXPORT_SYMBOL(flush_dcache_folio); #endif + +/** + * compat_vma_mmap_prepare() - Apply the file's .mmap_prepare() hook to an + * existing VMA + * @file: The file which possesss an f_op->mmap_prepare() hook + * @vma: The VMA to apply the .mmap_prepare() hook to. + * + * Ordinarily, .mmap_prepare() is invoked directly upon mmap(). However, certain + * 'wrapper' file systems invoke a nested mmap hook of an underlying file. + * + * Until all filesystems are converted to use .mmap_prepare(), we must be + * conservative and continue to invoke these 'wrapper' filesystems using the + * deprecated .mmap() hook. + * + * However we have a problem if the underlying file system possesses an + * .mmap_prepare() hook, as we are in a different context when we invoke the + * .mmap() hook, already having a VMA to deal with. + * + * compat_vma_mmap_prepare() is a compatibility function that takes VMA state, + * establishes a struct vm_area_desc descriptor, passes to the underlying + * .mmap_prepare() hook and applies any changes performed by it. + * + * Once the conversion of filesystems is complete this function will no longer + * be required and will be removed. + * + * Returns: 0 on success or error. + */ +int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} +EXPORT_SYMBOL(compat_vma_mmap_prepare); diff --git a/mm/vma.c b/mm/vma.c index 0fb9b2c7b734..fef67a66a095 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -3113,7 +3113,6 @@ int __vm_munmap(unsigned long start, size_t len, bool unlock) return ret; } - /* Insert vm structure into process list sorted by address * and into the inode's i_mmap tree. If vm_file is non-NULL * then i_mmap_rwsem is taken here. diff --git a/mm/vma.h b/mm/vma.h index 0db066e7a45d..f47112a352db 100644 --- a/mm/vma.h +++ b/mm/vma.h @@ -222,6 +222,53 @@ static inline int vma_iter_store_gfp(struct vma_iterator *vmi, return 0; } + +/* + * Temporary helper functions for file systems which wrap an invocation of + * f_op->mmap() but which might have an underlying file system which implements + * f_op->mmap_prepare(). + */ + +static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + desc->mm = vma->vm_mm; + desc->start = vma->vm_start; + desc->end = vma->vm_end; + + desc->pgoff = vma->vm_pgoff; + desc->file = vma->vm_file; + desc->vm_flags = vma->vm_flags; + desc->page_prot = vma->vm_page_prot; + + desc->vm_ops = NULL; + desc->private_data = NULL; + + return desc; +} + +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc) +{ + /* + * Since we're invoking .mmap_prepare() despite having a partially + * established VMA, we must take care to handle setting fields + * correctly. + */ + + /* Mutable fields. Populated with initial state. */ + vma->vm_pgoff = desc->pgoff; + if (vma->vm_file != desc->file) + vma_set_file(vma, desc->file); + if (vma->vm_flags != desc->vm_flags) + vm_flags_set(vma, desc->vm_flags); + vma->vm_page_prot = desc->page_prot; + + /* User-defined fields. */ + vma->vm_ops = desc->vm_ops; + vma->vm_private_data = desc->private_data; +} + int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 4505b1c31be1..14718ca23a05 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -159,6 +159,14 @@ typedef __bitwise unsigned int vm_fault_t; #define ASSERT_EXCLUSIVE_WRITER(x) +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + struct kref { refcount_t refcount; }; @@ -1468,4 +1476,12 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) (void)vma; } +static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) +{ + /* Changing an anonymous vma with this is illegal */ + get_file(file); + swap(vma->vm_file, file); + fput(file); +} + #endif /* __MM_VMA_INTERNAL_H */ From d742f3ec1cb91c4dd86b981f55cd291e07f03094 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 12 Jun 2025 10:42:55 +0200 Subject: [PATCH 265/497] drm/ast: Do not include Fix the compile-time warning drivers/gpu/drm/ast/ast_mode.c: warning: EXPORT_SYMBOL() is not used, but #include is present Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20250612084257.200907-1-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_mode.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 1de832964e92..031980d8f3ab 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -29,7 +29,6 @@ */ #include -#include #include #include From 2e3395ab2a358ba8d1c80d8df35dcfb882cfc28e Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 12 Jun 2025 10:53:02 +0200 Subject: [PATCH 266/497] drm/mgag200: Do not include Fix the compile-time warning drivers/gpu/drm/mgag200/mgag200_ddc.c: warning: EXPORT_SYMBOL() is not used, but #include is present Signed-off-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20250612085308.203861-1-tzimmermann@suse.de --- drivers/gpu/drm/mgag200/mgag200_ddc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mgag200/mgag200_ddc.c b/drivers/gpu/drm/mgag200/mgag200_ddc.c index 6d81ea8931e8..c31673eaa554 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ddc.c +++ b/drivers/gpu/drm/mgag200/mgag200_ddc.c @@ -26,7 +26,6 @@ * Authors: Dave Airlie */ -#include #include #include #include From b93755f408325170edb2156c6a894ed1cae5f4f6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 12 May 2025 20:14:55 +0200 Subject: [PATCH 267/497] powerpc/vdso: Fix build of VDSO32 with pcrel Building vdso32 on power10 with pcrel leads to following errors: VDSO32A arch/powerpc/kernel/vdso/gettimeofday-32.o arch/powerpc/kernel/vdso/gettimeofday.S: Assembler messages: arch/powerpc/kernel/vdso/gettimeofday.S:40: Error: syntax error; found `@', expected `,' arch/powerpc/kernel/vdso/gettimeofday.S:71: Info: macro invoked from here arch/powerpc/kernel/vdso/gettimeofday.S:40: Error: junk at end of line: `@notoc' arch/powerpc/kernel/vdso/gettimeofday.S:71: Info: macro invoked from here ... make[2]: *** [arch/powerpc/kernel/vdso/Makefile:85: arch/powerpc/kernel/vdso/gettimeofday-32.o] Error 1 make[1]: *** [arch/powerpc/Makefile:388: vdso_prepare] Error 2 Once the above is fixed, the following happens: VDSO32C arch/powerpc/kernel/vdso/vgettimeofday-32.o cc1: error: '-mpcrel' requires '-mcmodel=medium' make[2]: *** [arch/powerpc/kernel/vdso/Makefile:89: arch/powerpc/kernel/vdso/vgettimeofday-32.o] Error 1 make[1]: *** [arch/powerpc/Makefile:388: vdso_prepare] Error 2 make: *** [Makefile:251: __sub-make] Error 2 Make sure pcrel version of CFUNC() macro is used only for powerpc64 builds and remove -mpcrel for powerpc32 builds. Fixes: 7e3a68be42e1 ("powerpc/64: vmlinux support building with PCREL addresing") Signed-off-by: Christophe Leroy Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/1fa3453f07d42a50a70114da9905bf7b73304fca.1747073669.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ppc_asm.h | 2 +- arch/powerpc/kernel/vdso/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 02897f4b0dbf..b891910fce8a 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -183,7 +183,7 @@ /* * Used to name C functions called from asm */ -#ifdef CONFIG_PPC_KERNEL_PCREL +#if defined(__powerpc64__) && defined(CONFIG_PPC_KERNEL_PCREL) #define CFUNC(name) name@notoc #else #define CFUNC(name) name diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index e8824f933326..8834dfe9d727 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -53,7 +53,7 @@ ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WAR ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) CC32FLAGS := -m32 -CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel ifdef CONFIG_CC_IS_CLANG # This flag is supported by clang for 64-bit but not 32-bit so it will cause # an unused command line flag warning for this file. From 33bc69cf6655cf60829a803a45275f11a74899e5 Mon Sep 17 00:00:00 2001 From: Narayana Murty N Date: Thu, 8 May 2025 02:29:28 -0400 Subject: [PATCH 268/497] powerpc/eeh: Fix missing PE bridge reconfiguration during VFIO EEH recovery VFIO EEH recovery for PCI passthrough devices fails on PowerNV and pseries platforms due to missing host-side PE bridge reconfiguration. In the current implementation, eeh_pe_configure() only performs RTAS or OPAL-based bridge reconfiguration for native host devices, but skips it entirely for PEs managed through VFIO in guest passthrough scenarios. This leads to incomplete EEH recovery when a PCI error affects a passthrough device assigned to a QEMU/KVM guest. Although VFIO triggers the EEH recovery flow through VFIO_EEH_PE_ENABLE ioctl, the platform-specific bridge reconfiguration step is silently bypassed. As a result, the PE's config space is not fully restored, causing subsequent config space access failures or EEH freeze-on-access errors inside the guest. This patch fixes the issue by ensuring that eeh_pe_configure() always invokes the platform's configure_bridge() callback (e.g., pseries_eeh_phb_configure_bridge) even for VFIO-managed PEs. This ensures that RTAS or OPAL calls to reconfigure the PE bridge are correctly issued on the host side, restoring the PE's configuration space after an EEH event. This fix is essential for reliable EEH recovery in QEMU/KVM guests using VFIO PCI passthrough on PowerNV and pseries systems. Tested with: - QEMU/KVM guest using VFIO passthrough (IBM Power9,(lpar)Power11 host) - Injected EEH errors with pseries EEH errinjct tool on host, recovery verified on qemu guest. - Verified successful config space access and CAP_EXP DevCtl restoration after recovery Fixes: 212d16cdca2d ("powerpc/eeh: EEH support for VFIO PCI device") Signed-off-by: Narayana Murty N Reviewed-by: Vaibhav Jain Reviewed-by: Ganesh Goudar Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250508062928.146043-1-nnmlinux@linux.ibm.com --- arch/powerpc/kernel/eeh.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 83fe99861eb1..ca7f7bb2b478 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1509,6 +1509,8 @@ int eeh_pe_configure(struct eeh_pe *pe) /* Invalid PE ? */ if (!pe) return -ENODEV; + else + ret = eeh_ops->configure_bridge(pe); return ret; } From 4e6d080acfda5344ccbc63afe778830e22be4be9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Wed, 11 Jun 2025 23:07:49 +0200 Subject: [PATCH 269/497] powerpc/microwatt: Fix model property in device tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The standard property for the model name is called "model". Signed-off-by: J. Neuschäfer Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250611-microwatt-v2-1-80847bbc5f9c@posteo.net --- arch/powerpc/boot/dts/microwatt.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts index c4e4d2a9b460..b7eac4e56019 100644 --- a/arch/powerpc/boot/dts/microwatt.dts +++ b/arch/powerpc/boot/dts/microwatt.dts @@ -4,7 +4,7 @@ / { #size-cells = <0x02>; #address-cells = <0x02>; - model-name = "microwatt"; + model = "microwatt"; compatible = "microwatt-soc"; aliases { From e75cb6010838f61b9d63e921d1763a8ab177e38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=2E=20Neusch=C3=A4fer?= Date: Wed, 11 Jun 2025 21:01:01 +0200 Subject: [PATCH 270/497] powerpc: dts: mpc8315erdb: Add GPIO controller node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MPC8315E SoC and variants have a GPIO controller at IMMR + 0xc00. This node was previously missing from the device tree. Signed-off-by: J. Neuschäfer Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250611-mpc-gpio-v1-1-02d1f75336e2@posteo.net --- arch/powerpc/boot/dts/mpc8315erdb.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index e09b37d7489d..a89cb3139ca8 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -6,6 +6,7 @@ */ /dts-v1/; +#include / { compatible = "fsl,mpc8315erdb"; @@ -358,6 +359,15 @@ pmc: power@b00 { interrupt-parent = <&ipic>; fsl,mpc8313-wakeup-timer = <>m1>; }; + + gpio: gpio-controller@c00 { + compatible = "fsl,mpc8314-gpio"; + reg = <0xc00 0x100>; + interrupts = <74 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&ipic>; + gpio-controller; + #gpio-cells = <2>; + }; }; pci0: pci@e0008500 { From b0823d5fbacb1c551d793cbfe7af24e0d1fa45ed Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 12 Jun 2025 07:38:18 -0700 Subject: [PATCH 271/497] perf/x86/intel: Fix crash in icl_update_topdown_event() The perf_fuzzer found a hard-lockup crash on a RaptorLake machine: Oops: general protection fault, maybe for address 0xffff89aeceab400: 0000 CPU: 23 UID: 0 PID: 0 Comm: swapper/23 Tainted: [W]=WARN Hardware name: Dell Inc. Precision 9660/0VJ762 RIP: 0010:native_read_pmc+0x7/0x40 Code: cc e8 8d a9 01 00 48 89 03 5b cd cc cc cc cc 0f 1f ... RSP: 000:fffb03100273de8 EFLAGS: 00010046 .... Call Trace: icl_update_topdown_event+0x165/0x190 ? ktime_get+0x38/0xd0 intel_pmu_read_event+0xf9/0x210 __perf_event_read+0xf9/0x210 CPUs 16-23 are E-core CPUs that don't support the perf metrics feature. The icl_update_topdown_event() should not be invoked on these CPUs. It's a regression of commit: f9bdf1f95339 ("perf/x86/intel: Avoid disable PMU if !cpuc->enabled in sample read") The bug introduced by that commit is that the is_topdown_event() function is mistakenly used to replace the is_topdown_count() call to check if the topdown functions for the perf metrics feature should be invoked. Fix it. Fixes: f9bdf1f95339 ("perf/x86/intel: Avoid disable PMU if !cpuc->enabled in sample read") Closes: https://lore.kernel.org/lkml/352f0709-f026-cd45-e60c-60dfd97f73f3@maine.edu/ Reported-by: Vince Weaver Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Vince Weaver Cc: stable@vger.kernel.org # v6.15+ Link: https://lore.kernel.org/r/20250612143818.2889040-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 741b229f0718..c2fb729c270e 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2826,7 +2826,7 @@ static void intel_pmu_read_event(struct perf_event *event) * If the PEBS counters snapshotting is enabled, * the topdown event is available in PEBS records. */ - if (is_topdown_event(event) && !is_pebs_counter_event_group(event)) + if (is_topdown_count(event) && !is_pebs_counter_event_group(event)) static_call(intel_pmu_update_topdown_event)(event, NULL); else intel_pmu_drain_pebs_buffer(); From ac90aad0e9bf7c37e706fdc08ce763a553890bdf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 12 Jun 2025 10:47:09 -0700 Subject: [PATCH 272/497] crypto: testmgr - reinstate kconfig control over full self-tests Commit 698de822780f ("crypto: testmgr - make it easier to enable the full set of tests") removed support for building kernels that run only the "fast" set of crypto self-tests by default. This assumed that nearly everyone actually wanted the full set of tests, *if* they had already chosen to enable the tests at all. Unfortunately, it turns out that both Debian and Fedora intentionally have the crypto self-tests enabled in their production kernels. And for production kernels we do need to keep the testing time down, which implies just running the "fast" tests, not the full set of tests. For Fedora, a reason for enabling the tests in production is that they are being (mis)used to meet the FIPS 140-3 pre-operational testing requirement. However, the other reason for enabling the tests in production, which applies to both distros, is that they provide some value in protecting users from buggy drivers. Unfortunately, the crypto/ subsystem has many buggy and untested drivers for off-CPU hardware accelerators on rare platforms. These broken drivers get shipped to users, and there have been multiple examples of the tests preventing these buggy drivers from being used. So effectively, the tests are being relied on in production kernels. I think this is kind of crazy (untested drivers should just not be enabled at all), but that seems to be how things work currently. Thus, reintroduce a kconfig option that controls the level of testing. Call it CRYPTO_SELFTESTS_FULL instead of the original name CRYPTO_MANAGER_EXTRA_TESTS, which was slightly misleading. Moreover, given the "production kernel" use case, make CRYPTO_SELFTESTS depend on EXPERT instead of DEBUG_KERNEL. I also haven't reinstated all the #ifdefs in crypto/testmgr.c. Instead, just rely on the compiler to optimize out unused code. Fixes: 40b9969796bf ("crypto: testmgr - replace CRYPTO_MANAGER_DISABLE_TESTS with CRYPTO_SELFTESTS") Fixes: 698de822780f ("crypto: testmgr - make it easier to enable the full set of tests") Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/Kconfig | 25 +++++++++++++++++++++---- crypto/testmgr.c | 15 ++++++++++++--- include/crypto/internal/simd.h | 6 ++++-- lib/crypto/Makefile | 2 +- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index e9fee7818e27..e1cfd0d4cc8f 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -176,16 +176,33 @@ config CRYPTO_USER config CRYPTO_SELFTESTS bool "Enable cryptographic self-tests" - depends on DEBUG_KERNEL + depends on EXPERT help Enable the cryptographic self-tests. The cryptographic self-tests run at boot time, or at algorithm registration time if algorithms are dynamically loaded later. - This is primarily intended for developer use. It should not be - enabled in production kernels, unless you are trying to use these - tests to fulfill a FIPS testing requirement. + There are two main use cases for these tests: + + - Development and pre-release testing. In this case, also enable + CRYPTO_SELFTESTS_FULL to get the full set of tests. All crypto code + in the kernel is expected to pass the full set of tests. + + - Production kernels, to help prevent buggy drivers from being used + and/or meet FIPS 140-3 pre-operational testing requirements. In + this case, enable CRYPTO_SELFTESTS but not CRYPTO_SELFTESTS_FULL. + +config CRYPTO_SELFTESTS_FULL + bool "Enable the full set of cryptographic self-tests" + depends on CRYPTO_SELFTESTS + help + Enable the full set of cryptographic self-tests for each algorithm. + + The full set of tests should be enabled for development and + pre-release testing, but not in production kernels. + + All crypto code in the kernel is expected to pass the full tests. config CRYPTO_NULL tristate "Null algorithms" diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 72005074a5c2..32f753d6c430 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -45,6 +45,7 @@ static bool notests; module_param(notests, bool, 0644); MODULE_PARM_DESC(notests, "disable all crypto self-tests"); +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL static bool noslowtests; module_param(noslowtests, bool, 0644); MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests"); @@ -52,6 +53,10 @@ MODULE_PARM_DESC(noslowtests, "disable slow crypto self-tests"); static unsigned int fuzz_iterations = 100; module_param(fuzz_iterations, uint, 0644); MODULE_PARM_DESC(fuzz_iterations, "number of fuzz test iterations"); +#else +#define noslowtests 1 +#define fuzz_iterations 0 +#endif #ifndef CONFIG_CRYPTO_SELFTESTS @@ -319,9 +324,9 @@ struct testvec_config { /* * The following are the lists of testvec_configs to test for each algorithm - * type when the fast crypto self-tests are enabled. They aim to provide good - * test coverage, while keeping the test time much shorter than the full tests - * so that the fast tests can be used to fulfill FIPS 140 testing requirements. + * type when the "fast" crypto self-tests are enabled. They aim to provide good + * test coverage, while keeping the test time much shorter than the "full" tests + * so that the "fast" tests can be enabled in a wider range of circumstances. */ /* Configs for skciphers and aeads */ @@ -1183,14 +1188,18 @@ static void generate_random_testvec_config(struct rnd_state *rng, static void crypto_disable_simd_for_test(void) { +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL migrate_disable(); __this_cpu_write(crypto_simd_disabled_for_test, true); +#endif } static void crypto_reenable_simd_for_test(void) { +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL __this_cpu_write(crypto_simd_disabled_for_test, false); migrate_enable(); +#endif } /* diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h index 7e7f1ac3b7fd..9e338e7aafbd 100644 --- a/include/crypto/internal/simd.h +++ b/include/crypto/internal/simd.h @@ -44,9 +44,11 @@ void simd_unregister_aeads(struct aead_alg *algs, int count, * * This delegates to may_use_simd(), except that this also returns false if SIMD * in crypto code has been temporarily disabled on this CPU by the crypto - * self-tests, in order to test the no-SIMD fallback code. + * self-tests, in order to test the no-SIMD fallback code. This override is + * currently limited to configurations where the "full" self-tests are enabled, + * because it might be a bit too invasive to be part of the "fast" self-tests. */ -#ifdef CONFIG_CRYPTO_SELFTESTS +#ifdef CONFIG_CRYPTO_SELFTESTS_FULL DECLARE_PER_CPU(bool, crypto_simd_disabled_for_test); #define crypto_simd_usable() \ (may_use_simd() && !this_cpu_read(crypto_simd_disabled_for_test)) diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 3e79283b617d..f9e44aac6619 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -62,7 +62,7 @@ libsha256-generic-y := sha256-generic.o obj-$(CONFIG_MPILIB) += mpi/ -obj-$(CONFIG_CRYPTO_SELFTESTS) += simd.o +obj-$(CONFIG_CRYPTO_SELFTESTS_FULL) += simd.o obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o libsm3-y := sm3.o From 9f0ad43b158d07bc7144d219ceabdea36e28e392 Mon Sep 17 00:00:00 2001 From: Thangaraj Samynathan Date: Thu, 12 Jun 2025 08:00:59 +0530 Subject: [PATCH 273/497] spi: spi-pci1xxxx: Drop MSI-X usage as unsupported by DMA engine Removes MSI-X from the interrupt request path, as the DMA engine used by the SPI controller does not support MSI-X interrupts. Signed-off-by: Thangaraj Samynathan Link: https://patch.msgid.link/20250612023059.71726-1-thangaraj.s@microchip.com Signed-off-by: Mark Brown --- drivers/spi/spi-pci1xxxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pci1xxxx.c b/drivers/spi/spi-pci1xxxx.c index 9112d8a1a0c8..e27642c4dea4 100644 --- a/drivers/spi/spi-pci1xxxx.c +++ b/drivers/spi/spi-pci1xxxx.c @@ -762,7 +762,7 @@ static int pci1xxxx_spi_probe(struct pci_dev *pdev, const struct pci_device_id * return -EINVAL; num_vector = pci_alloc_irq_vectors(pdev, 1, hw_inst_cnt, - PCI_IRQ_ALL_TYPES); + PCI_IRQ_INTX | PCI_IRQ_MSI); if (num_vector < 0) { dev_err(&pdev->dev, "Error allocating MSI vectors\n"); return num_vector; From 9ba75ccad85708c5a484637dccc1fc59295b0a83 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 11 Jun 2025 15:33:40 -0500 Subject: [PATCH 274/497] platform/x86/amd/pmc: Add PCSpecialist Lafite Pro V 14M to 8042 quirks list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every other s2idle cycle fails to reach hardware sleep when keyboard wakeup is enabled. This appears to be an EC bug, but the vendor refuses to fix it. It was confirmed that turning off i8042 wakeup avoids ths issue (albeit keyboard wakeup is disabled). Take the lesser of two evils and add it to the i8042 quirk list. Reported-by: Raoul Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220116 Tested-by: Raoul Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20250611203341.3733478-1-superm1@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/amd/pmc/pmc-quirks.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/amd/pmc/pmc-quirks.c b/drivers/platform/x86/amd/pmc/pmc-quirks.c index 5c7c01f66cde..f292111bd065 100644 --- a/drivers/platform/x86/amd/pmc/pmc-quirks.c +++ b/drivers/platform/x86/amd/pmc/pmc-quirks.c @@ -225,6 +225,15 @@ static const struct dmi_system_id fwbug_list[] = { DMI_MATCH(DMI_BOARD_NAME, "WUJIE14-GX4HRXL"), } }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=220116 */ + { + .ident = "PCSpecialist Lafite Pro V 14M", + .driver_data = &quirk_spurious_8042, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"), + DMI_MATCH(DMI_PRODUCT_NAME, "Lafite Pro V 14M"), + } + }, {} }; From e2468dc700743683e1d1793bbd855e2536fd3de2 Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Wed, 11 Jun 2025 18:30:40 -0300 Subject: [PATCH 275/497] Revert "platform/x86: alienware-wmi-wmax: Add G-Mode support to Alienware m16 R1" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 5ff79cabb23a2f14d2ed29e9596aec908905a0e6. Although the Alienware m16 R1 AMD model supports G-Mode, it actually has a lower power ceiling than plain "performance" profile, which results in lower performance. Reported-by: Cihan Ozakca Cc: stable@vger.kernel.org # 6.15.x Signed-off-by: Kurt Borja Link: https://lore.kernel.org/r/20250611-m16-rev-v1-1-72d13bad03c9@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index c42f9228b0b2..20ec122a9fe0 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -119,7 +119,7 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"), }, - .driver_data = &g_series_quirks, + .driver_data = &generic_quirks, }, { .ident = "Alienware m16 R2", From f826ec7966a63d48e16e0868af4e038bf9a1a3ae Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:41:25 +0100 Subject: [PATCH 276/497] bio: Fix bio_first_folio() for SPARSEMEM without VMEMMAP It is possible for physically contiguous folios to have discontiguous struct pages if SPARSEMEM is enabled and SPARSEMEM_VMEMMAP is not. This is correctly handled by folio_page_idx(), so remove this open-coded implementation. Fixes: 640d1930bef4 (block: Add bio_for_each_folio_all()) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144126.2849931-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/bio.h b/include/linux/bio.h index 9c37c66ef9ca..46ffac5caab7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -291,7 +291,7 @@ static inline void bio_first_folio(struct folio_iter *fi, struct bio *bio, fi->folio = page_folio(bvec->bv_page); fi->offset = bvec->bv_offset + - PAGE_SIZE * (bvec->bv_page - &fi->folio->page); + PAGE_SIZE * folio_page_idx(fi->folio, bvec->bv_page); fi->_seg_count = bvec->bv_len; fi->length = min(folio_size(fi->folio) - fi->offset, fi->_seg_count); fi->_next = folio_next(fi->folio); From 5e223e06ee7c6d8f630041a0645ac90e39a42cc6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 12 Jun 2025 15:42:53 +0100 Subject: [PATCH 277/497] block: Fix bvec_set_folio() for very large folios Similarly to 26064d3e2b4d ("block: fix adding folio to bio"), if we attempt to add a folio that is larger than 4GB, we'll silently truncate the offset and len. Widen the parameters to size_t, assert that the length is less than 4GB and set the first page that contains the interesting data rather than the first page of the folio. Fixes: 26db5ee15851 (block: add a bvec_set_folio helper) Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20250612144255.2850278-1-willy@infradead.org Signed-off-by: Jens Axboe --- include/linux/bvec.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 204b22a99c4b..0a80e1f9aa20 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -57,9 +57,12 @@ static inline void bvec_set_page(struct bio_vec *bv, struct page *page, * @offset: offset into the folio */ static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio, - unsigned int len, unsigned int offset) + size_t len, size_t offset) { - bvec_set_page(bv, &folio->page, len, offset); + unsigned long nr = offset / PAGE_SIZE; + + WARN_ON_ONCE(len > UINT_MAX); + bvec_set_page(bv, folio_page(folio, nr), len, offset % PAGE_SIZE); } /** From b5acc3628898baa63658bc4125f9525f9b3dd4f3 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 12 Jun 2025 16:17:51 +0200 Subject: [PATCH 278/497] ata: ahci: Disallow LPM for ASUSPRO-D840SA motherboard A user has bisected a regression which causes graphical corruptions on his screen to commit 7627a0edef54 ("ata: ahci: Drop low power policy board type"). Simply reverting commit 7627a0edef54 ("ata: ahci: Drop low power policy board type") makes the graphical corruptions on his screen to go away. (Note: there are no visible messages in dmesg that indicates a problem with AHCI.) The user also reports that the problem occurs regardless if there is an HDD or an SSD connected via AHCI, so the problem is not device related. The devices also work fine on other motherboards, so it seems specific to the ASUSPRO-D840SA motherboard. While enabling low power modes for AHCI is not supposed to affect completely unrelated hardware, like a graphics card, it does however allow the system to enter deeper PC-states, which could expose ACPI issues that were previously not visible (because the system never entered these lower power states before). There are previous examples where enabling LPM exposed serious BIOS/ACPI bugs, see e.g. commit 240630e61870 ("ahci: Disable LPM on Lenovo 50 series laptops with a too old BIOS"). Since there hasn't been any BIOS update in years for the ASUSPRO-D840SA motherboard, disable LPM for this board, in order to avoid entering lower PC-states, which triggers graphical corruptions. Cc: stable@vger.kernel.org Reported-by: Andy Yang Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220111 Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type") Reviewed-by: Damien Le Moal Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20250612141750.2108342-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index e7c8357cbc54..c8ad8ace7496 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1410,8 +1410,15 @@ static bool ahci_broken_suspend(struct pci_dev *pdev) static bool ahci_broken_lpm(struct pci_dev *pdev) { + /* + * Platforms with LPM problems. + * If driver_data is NULL, there is no existing BIOS version with + * functioning LPM. + * If driver_data is non-NULL, then driver_data contains the DMI BIOS + * build date of the first BIOS version with functioning LPM (i.e. older + * BIOS versions have broken LPM). + */ static const struct dmi_system_id sysids[] = { - /* Various Lenovo 50 series have LPM issues with older BIOSen */ { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), @@ -1440,6 +1447,13 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) }, .driver_data = "20180409", /* 2.35 */ }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_VERSION, "ASUSPRO D840MB_M840SA"), + }, + /* 320 is broken, there is no known good version yet. */ + }, { } /* terminate list */ }; const struct dmi_system_id *dmi = dmi_first_match(sysids); @@ -1449,6 +1463,9 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) if (!dmi) return false; + if (!dmi->driver_data) + return true; + dmi_get_date(DMI_BIOS_DATE, &year, &month, &date); snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date); From 2b32fc8ff08deac3aa509f321a28e21b1eea5525 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 12 Jun 2025 11:32:51 -0700 Subject: [PATCH 279/497] genirq/cpuhotplug: Rebalance managed interrupts across multi-CPU hotplug Commit 788019eb559f ("genirq: Retain disable depth for managed interrupts across CPU hotplug") intended to only decrement the disable depth once per managed shutdown, but instead it decrements for each CPU hotplug in the affinity mask, until its depth reaches a point where it finally gets re-started. For example, consider: 1. Interrupt is affine to CPU {M,N} 2. disable_irq() -> depth is 1 3. CPU M goes offline -> interrupt migrates to CPU N / depth is still 1 4. CPU N goes offline -> irq_shutdown() / depth is 2 5. CPU N goes online -> irq_restore_affinity_of_irq() -> irqd_is_managed_and_shutdown()==true -> irq_startup_managed() -> depth is 1 6. CPU M goes online -> irq_restore_affinity_of_irq() -> irqd_is_managed_and_shutdown()==true -> irq_startup_managed() -> depth is 0 *** BUG: driver expects the interrupt is still disabled *** -> irq_startup() -> irqd_clr_managed_shutdown() 7. enable_irq() -> depth underflow / unbalanced enable_irq() warning This should clear the managed-shutdown flag at step 6, so that further hotplugs don't cause further imbalance. Note: It might be cleaner to also remove the irqd_clr_managed_shutdown() invocation from __irq_startup_managed(). But this is currently not possible because of irq_update_affinity_desc() as it sets IRQD_MANAGED_SHUTDOWN and expects irq_startup() to clear it. Fixes: 788019eb559f ("genirq: Retain disable depth for managed interrupts across CPU hotplug") Reported-by: Aleksandrs Vinarskis Signed-off-by: Brian Norris Signed-off-by: Thomas Gleixner Tested-by: Aleksandrs Vinarskis Link: https://lore.kernel.org/all/20250612183303.3433234-2-briannorris@chromium.org --- kernel/irq/chip.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b0e0a7332993..2b274007e8ba 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -205,6 +205,14 @@ __irq_startup_managed(struct irq_desc *desc, const struct cpumask *aff, void irq_startup_managed(struct irq_desc *desc) { + struct irq_data *d = irq_desc_get_irq_data(desc); + + /* + * Clear managed-shutdown flag, so we don't repeat managed-startup for + * multiple hotplugs, and cause imbalanced disable depth. + */ + irqd_clr_managed_shutdown(d); + /* * Only start it up when the disable depth is 1, so that a disable, * hotunplug, hotplug sequence does not end up enabling it during From 72218d74c9c57b8ea36c2a58875dff406fc10462 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 12 Jun 2025 11:32:52 -0700 Subject: [PATCH 280/497] genirq/cpuhotplug: Restore affinity even for suspended IRQ Commit 788019eb559f ("genirq: Retain disable depth for managed interrupts across CPU hotplug") tried to make managed shutdown/startup properly reference counted, but it missed the fact that the unplug and hotplug code has an intentional imbalance by skipping IRQS_SUSPENDED interrupts on the "restore" path. This means that if a managed-affinity interrupt was both suspended and managed-shutdown (such as may happen during system suspend / S3), resume skips calling irq_startup_managed(), and would again have an unbalanced depth this time, with a positive value (i.e., remaining unexpectedly masked). This IRQS_SUSPENDED check was introduced in commit a60dd06af674 ("genirq/cpuhotplug: Skip suspended interrupts when restoring affinity") for essentially the same reason as commit 788019eb559f, to prevent that irq_startup() would unconditionally re-enable an interrupt too early. Because irq_startup_managed() now respsects the disable-depth count, the IRQS_SUSPENDED check is not longer needed, and instead, it causes harm. Thus, drop the IRQS_SUSPENDED check, and restore balance. This effectively reverts commit a60dd06af674 ("genirq/cpuhotplug: Skip suspended interrupts when restoring affinity"), because it is replaced by commit 788019eb559f ("genirq: Retain disable depth for managed interrupts across CPU hotplug"). Fixes: 788019eb559f ("genirq: Retain disable depth for managed interrupts across CPU hotplug") Reported-by: Aleksandrs Vinarskis Signed-off-by: Brian Norris Signed-off-by: Thomas Gleixner Tested-by: Aleksandrs Vinarskis Link: https://lore.kernel.org/all/20250612183303.3433234-3-briannorris@chromium.org Closes: https://lore.kernel.org/lkml/24ec4adc-7c80-49e9-93ee-19908a97ab84@gmail.com/ --- kernel/irq/cpuhotplug.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index f07529ae4895..755346ea9819 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -210,13 +210,6 @@ static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu) !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity)) return; - /* - * Don't restore suspended interrupts here when a system comes back - * from S3. They are reenabled via resume_device_irqs(). - */ - if (desc->istate & IRQS_SUSPENDED) - return; - if (irqd_is_managed_and_shutdown(data)) irq_startup_managed(desc); From 8a2277a3c9e4cc5398f80821afe7ecbe9bdf2819 Mon Sep 17 00:00:00 2001 From: Gyeyoung Baek Date: Thu, 12 Jun 2025 21:48:27 +0900 Subject: [PATCH 281/497] genirq/irq_sim: Initialize work context pointers properly Initialize `ops` member's pointers properly by using kzalloc() instead of kmalloc() when allocating the simulation work context. Otherwise the pointers contain random content leading to invalid dereferencing. Signed-off-by: Gyeyoung Baek Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250612124827.63259-1-gye976@gmail.com --- kernel/irq/irq_sim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 1a3d483548e2..ae4c9cbd1b4b 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -202,7 +202,7 @@ struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode, void *data) { struct irq_sim_work_ctx *work_ctx __free(kfree) = - kmalloc(sizeof(*work_ctx), GFP_KERNEL); + kzalloc(sizeof(*work_ctx), GFP_KERNEL); if (!work_ctx) return ERR_PTR(-ENOMEM); From 80626ae6ffe57917915c6e6d8ea1e908689954fd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 May 2025 14:15:12 +0100 Subject: [PATCH 282/497] drm/nouveau/gsp: Fix potential integer overflow on integer shifts The left shift int 32 bit integer constants 1 is evaluated using 32 bit arithmetic and then assigned to a 64 bit unsigned integer. In the case where the shift is 32 or more this can lead to an overflow. Avoid this by shifting using the BIT_ULL macro instead. Fixes: 6c3ac7bcfcff ("drm/nouveau/gsp: support deeper page tables in COPY_SERVER_RESERVED_PDES") Signed-off-by: Colin Ian King Signed-off-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250522131512.2768310-1-colin.i.king@gmail.com --- drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c index 52f2e5f14517..f25ea610cd99 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/vmm.c @@ -121,7 +121,7 @@ r535_mmu_vaspace_new(struct nvkm_vmm *vmm, u32 handle, bool external) page_shift -= desc->bits; ctrl->levels[i].physAddress = pd->pt[0]->addr; - ctrl->levels[i].size = (1 << desc->bits) * desc->size; + ctrl->levels[i].size = BIT_ULL(desc->bits) * desc->size; ctrl->levels[i].aperture = 1; ctrl->levels[i].pageShift = page_shift; From 9802f0a63b641f4cddb2139c814c2e95cb825099 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 27 May 2025 16:37:12 +0000 Subject: [PATCH 283/497] drm/nouveau: fix a use-after-free in r535_gsp_rpc_push() The RPC container is released after being passed to r535_gsp_rpc_send(). When sending the initial fragment of a large RPC and passing the caller's RPC container, the container will be freed prematurely. Subsequent attempts to send remaining fragments will therefore result in a use-after-free. Allocate a temporary RPC container for holding the initial fragment of a large RPC when sending. Free the caller's container when all fragments are successfully sent. Fixes: 176fdcbddfd2 ("drm/nouveau/gsp/r535: add support for booting GSP-RM") Signed-off-by: Zhi Wang Link: https://lore.kernel.org/r/20250527163712.3444-1-zhiw@nvidia.com [ Rebase onto Blackwell changes. - Danilo ] Signed-off-by: Danilo Krummrich --- .../drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c index 5acb98d137bd..9d06ff722fea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c @@ -637,12 +637,18 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, if (payload_size > max_payload_size) { const u32 fn = rpc->function; u32 remain_payload_size = payload_size; + void *next; - /* Adjust length, and send initial RPC. */ - rpc->length = sizeof(*rpc) + max_payload_size; - msg->checksum = rpc->length; + /* Send initial RPC. */ + next = r535_gsp_rpc_get(gsp, fn, max_payload_size); + if (IS_ERR(next)) { + repv = next; + goto done; + } - repv = r535_gsp_rpc_send(gsp, payload, NVKM_GSP_RPC_REPLY_NOWAIT, 0); + memcpy(next, payload, max_payload_size); + + repv = r535_gsp_rpc_send(gsp, next, NVKM_GSP_RPC_REPLY_NOWAIT, 0); if (IS_ERR(repv)) goto done; @@ -653,7 +659,6 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, while (remain_payload_size) { u32 size = min(remain_payload_size, max_payload_size); - void *next; next = r535_gsp_rpc_get(gsp, NV_VGPU_MSG_FUNCTION_CONTINUATION_RECORD, size); if (IS_ERR(next)) { @@ -674,6 +679,8 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, /* Wait for reply. */ repv = r535_gsp_rpc_handle_reply(gsp, fn, policy, payload_size + sizeof(*rpc)); + if (!IS_ERR(repv)) + kvfree(msg); } else { repv = r535_gsp_rpc_send(gsp, payload, policy, gsp_rpc_len); } From 61b2b3737499f1fb361a54a16828db24a8345e85 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 10 Jun 2025 14:54:51 -0700 Subject: [PATCH 284/497] drm/nouveau/bl: increase buffer size to avoid truncate warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nouveau_get_backlight_name() function generates a unique name for the backlight interface, appending an id from 1 to 99 for all backlight devices after the first. GCC 15 (and likely other compilers) produce the following -Wformat-truncation warning: nouveau_backlight.c: In function ‘nouveau_backlight_init’: nouveau_backlight.c:56:69: error: ‘%d’ directive output may be truncated writing between 1 and 10 bytes into a region of size 3 [-Werror=format-truncation=] 56 | snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb); | ^~ In function ‘nouveau_get_backlight_name’, inlined from ‘nouveau_backlight_init’ at nouveau_backlight.c:351:7: nouveau_backlight.c:56:56: note: directive argument in the range [1, 2147483647] 56 | snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb); | ^~~~~~~~~~~~~~~~ nouveau_backlight.c:56:17: note: ‘snprintf’ output between 14 and 23 bytes into a destination of size 15 56 | snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The warning started appearing after commit ab244be47a8f ("drm/nouveau: Fix a potential theorical leak in nouveau_get_backlight_name()") This fix for the ida usage removed the explicit value check for ids larger than 99. The compiler is unable to intuit that the ida_alloc_max() limits the returned value range between 0 and 99. Because the compiler can no longer infer that the number ranges from 0 to 99, it thinks that it could use as many as 11 digits (10 + the potential - sign for negative numbers). The warning has gone unfixed for some time, with at least one kernel test robot report. The code breaks W=1 builds, which is especially frustrating with the introduction of CONFIG_WERROR. The string is stored temporarily on the stack and then copied into the device name. Its not a big deal to use 11 more bytes of stack rounding out to an even 24 bytes. Increase BL_NAME_SIZE to 24 to avoid the truncation warning. This fixes the W=1 builds that include this driver. Compile tested only. Fixes: ab244be47a8f ("drm/nouveau: Fix a potential theorical leak in nouveau_get_backlight_name()") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202312050324.0kv4PnfZ-lkp@intel.com/ Suggested-by: Timur Tabi Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20250610-jk-nouveua-drm-bl-snprintf-fix-v2-1-7fdd4b84b48e@intel.com Signed-off-by: Danilo Krummrich --- drivers/gpu/drm/nouveau/nouveau_backlight.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c index d47442125fa1..9aae26eb7d8f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_backlight.c +++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c @@ -42,7 +42,7 @@ #include "nouveau_acpi.h" static struct ida bl_ida; -#define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0' +#define BL_NAME_SIZE 24 // 12 for name + 11 for digits + 1 for '\0' static bool nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], From 553ab30a181043f01b99a493ba13f9c011eb75ae Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 11 Jun 2025 09:08:06 +0700 Subject: [PATCH 285/497] Documentation: nouveau: Update GSP message queue kernel-doc reference GSP message queue docs has been moved following RPC handling split in commit 8a8b1ec5261f20 ("drm/nouveau/gsp: split rpc handling out on its own"), before GSP-RM implementation is versioned in commit c472d828348caf ("drm/nouveau/gsp: move subdev/engine impls to subdev/gsp/rm/r535/"). However, the kernel-doc reference in nouveau docs is left behind, which triggers htmldocs warnings: ERROR: Cannot find file ./drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c WARNING: No kernel-doc for file ./drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c Update the reference. Fixes: c472d828348c ("drm/nouveau/gsp: move subdev/engine impls to subdev/gsp/rm/r535/") Fixes: 8a8b1ec5261f ("drm/nouveau/gsp: split rpc handling out on its own") Signed-off-by: Bagas Sanjaya Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20250611020805.22418-2-bagasdotme@gmail.com Signed-off-by: Danilo Krummrich --- Documentation/gpu/nouveau.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/gpu/nouveau.rst b/Documentation/gpu/nouveau.rst index b8c801e0068c..cab2e81013bc 100644 --- a/Documentation/gpu/nouveau.rst +++ b/Documentation/gpu/nouveau.rst @@ -25,7 +25,7 @@ providing a consistent API to upper layers of the driver stack. GSP Support ------------------------ -.. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +.. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/rpc.c :doc: GSP message queue element .. kernel-doc:: drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h From f9705d66fa7107fcd619083f7aae2afb0554a593 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 3 Jun 2025 16:14:45 -0300 Subject: [PATCH 286/497] iommu/tegra: Fix incorrect size calculation This driver uses a mixture of ways to get the size of a PTE, tegra_smmu_set_pde() did it as sizeof(*pd) which became wrong when pd switched to a struct tegra_pd. Switch pd back to a u32* in tegra_smmu_set_pde() so the sizeof(*pd) returns 4. Fixes: 50568f87d1e2 ("iommu/terga: Do not use struct page as the handle for as->pd memory") Reported-by: Diogo Ivo Closes: https://lore.kernel.org/all/62e7f7fe-6200-4e4f-ad42-d58ad272baa6@tecnico.ulisboa.pt/ Signed-off-by: Jason Gunthorpe Acked-by: Thierry Reding Reviewed-by: Jerry Snitselaar Tested-by: Diogo Ivo Link: https://lore.kernel.org/r/0-v1-da7b8b3d57eb+ce-iommu_terga_sizeof_jgg@nvidia.com Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 61897d50162d..e58fe9d8b9e7 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -559,11 +559,11 @@ static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova, { unsigned int pd_index = iova_pd_index(iova); struct tegra_smmu *smmu = as->smmu; - struct tegra_pd *pd = as->pd; + u32 *pd = &as->pd->val[pd_index]; unsigned long offset = pd_index * sizeof(*pd); /* Set the page directory entry first */ - pd->val[pd_index] = value; + *pd = value; /* The flush the page directory entry from caches */ dma_sync_single_range_for_device(smmu->dev, as->pd_dma, offset, From db3dfae1a2f662e69d535827703bcdbb04b8d72b Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Fri, 13 Jun 2025 09:38:57 +0700 Subject: [PATCH 287/497] Documentation: ublk: Separate UBLK_F_AUTO_BUF_REG fallback behavior sublists Stephen Rothwell reports htmldocs warning on ublk docs: Documentation/block/ublk.rst:414: ERROR: Unexpected indentation. [docutils] Fix the warning by separating sublists of auto buffer registration fallback behavior from their appropriate parent list item. Fixes: ff20c516485e ("ublk: document auto buffer registration(UBLK_F_AUTO_BUF_REG)") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/linux-next/20250612132638.193de386@canb.auug.org.au/ Signed-off-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20250613023857.15971-1-bagasdotme@gmail.com Signed-off-by: Jens Axboe --- Documentation/block/ublk.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index abec524a04ed..8c4030bcabb6 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -408,6 +408,7 @@ Fallback Behavior If auto buffer registration fails: 1. When ``UBLK_AUTO_BUF_REG_FALLBACK`` is enabled: + - The uring_cmd is completed - ``UBLK_IO_F_NEED_REG_BUF`` is set in ``ublksrv_io_desc.op_flags`` - The ublk server must manually deal with the failure, such as, register @@ -415,6 +416,7 @@ If auto buffer registration fails: for handling ublk IO 2. If fallback is not enabled: + - The ublk I/O request fails silently - The uring_cmd won't be completed From d57e92dd660014ccac884eda616cafc7b04601e0 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 13 Jun 2025 14:30:37 +0200 Subject: [PATCH 288/497] spi: tegra210-qspi: Remove cache operations The DMA memory for this driver is allocated using dma_alloc_coherent(), which ends up mapping the allocated memory as uncached. Performing the various dma_sync_*() operations on this memory causes issues during SPI flashing: [ 7.818017] pc : dcache_inval_poc+0x40/0x58 [ 7.822128] lr : arch_sync_dma_for_cpu+0x2c/0x4c [ 7.826854] sp : ffff80008193bcf0 [ 7.830267] x29: ffff80008193bcf0 x28: ffffa3fe5ff1e908 x27: ffffa3fe627bb130 [ 7.837528] x26: ffff000086952180 x25: ffff00008015c8ac x24: ffff000086c9b480 [ 7.844878] x23: ffff00008015c800 x22: 0000000000000002 x21: 0000000000010000 [ 7.852229] x20: 0000000106dae000 x19: ffff000080112410 x18: 0000000000000001 [ 7.859580] x17: ffff000080159400 x16: ffffa3fe607a9bd8 x15: ffff0000eac1b180 [ 7.866753] x14: 000000000000000c x13: 0000000000000001 x12: 000000000000025a [ 7.874104] x11: 0000000000000000 x10: 7f73e96357f6a07f x9 : db1fc8072a7f5e3a [ 7.881365] x8 : ffff000086c9c588 x7 : ffffa3fe607a9bd8 x6 : ffff80008193bc28 [ 7.888630] x5 : 000000000000ffff x4 : 0000000000000009 x3 : 000000000000003f [ 7.895892] x2 : 0000000000000040 x1 : ffff000086dbe000 x0 : ffff000086db0000 [ 7.903155] Call trace: [ 7.905606] dcache_inval_poc+0x40/0x58 (P) [ 7.909804] iommu_dma_sync_single_for_cpu+0xb4/0xb8 [ 7.914617] __dma_sync_single_for_cpu+0x158/0x194 [ 7.919428] __this_module+0x5b020/0x5baf8 [spi_tegra210_quad] [ 7.925291] irq_thread_fn+0x2c/0xc0 [ 7.928966] irq_thread+0x16c/0x318 [ 7.932467] kthread+0x12c/0x214 Fix this by removing all calls to the dma_sync_*() functions. This isn't ideal because DMA is used only for relatively large (> 64 words or 256 bytes) and using uncached memory for this might be slow. Reworking this to use cached memory for faster access and reintroducing the cache maintenance calls is probably worth a follow-up patch. Reported-by: Brad Griffis Fixes: 017f1b0bae08 ("spi: tegra210-quad: Add support for internal DMA") Signed-off-by: Thierry Reding Link: https://patch.msgid.link/20250613123037.2082788-1-thierry.reding@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-tegra210-quad.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 3581757a269b..3be7499db21e 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -407,9 +407,6 @@ tegra_qspi_read_rx_fifo_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_tra static void tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_transfer *t) { - dma_sync_single_for_cpu(tqspi->dev, tqspi->tx_dma_phys, - tqspi->dma_buf_size, DMA_TO_DEVICE); - /* * In packed mode, each word in FIFO may contain multiple packets * based on bits per word. So all bytes in each FIFO word are valid. @@ -442,17 +439,11 @@ tegra_qspi_copy_client_txbuf_to_qspi_txbuf(struct tegra_qspi *tqspi, struct spi_ tqspi->cur_tx_pos += write_bytes; } - - dma_sync_single_for_device(tqspi->dev, tqspi->tx_dma_phys, - tqspi->dma_buf_size, DMA_TO_DEVICE); } static void tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_transfer *t) { - dma_sync_single_for_cpu(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); - if (tqspi->is_packed) { tqspi->cur_rx_pos += tqspi->curr_dma_words * tqspi->bytes_per_word; } else { @@ -478,9 +469,6 @@ tegra_qspi_copy_qspi_rxbuf_to_client_rxbuf(struct tegra_qspi *tqspi, struct spi_ tqspi->cur_rx_pos += read_bytes; } - - dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); } static void tegra_qspi_dma_complete(void *args) @@ -701,8 +689,6 @@ static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct return ret; } - dma_sync_single_for_device(tqspi->dev, tqspi->rx_dma_phys, - tqspi->dma_buf_size, DMA_FROM_DEVICE); ret = tegra_qspi_start_rx_dma(tqspi, t, len); if (ret < 0) { dev_err(tqspi->dev, "failed to start RX DMA: %d\n", ret); From ab107276607af90b13a5994997e19b7b9731e251 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sat, 17 May 2025 19:52:37 +0530 Subject: [PATCH 289/497] powerpc: Fix struct termio related ioctl macros Since termio interface is now obsolete, include/uapi/asm/ioctls.h has some constant macros referring to "struct termio", this caused build failure at userspace. In file included from /usr/include/asm/ioctl.h:12, from /usr/include/asm/ioctls.h:5, from tst-ioctls.c:3: tst-ioctls.c: In function 'get_TCGETA': tst-ioctls.c:12:10: error: invalid application of 'sizeof' to incomplete type 'struct termio' 12 | return TCGETA; | ^~~~~~ Even though termios.h provides "struct termio", trying to juggle definitions around to make it compile could introduce regressions. So better to open code it. Reported-by: Tulio Magno Suggested-by: Nicholas Piggin Tested-by: Justin M. Forbes Reviewed-by: Michael Ellerman Closes: https://lore.kernel.org/linuxppc-dev/8734dji5wl.fsf@ascii.art.br/ Signed-off-by: Madhavan Srinivasan Link: https://patch.msgid.link/20250517142237.156665-1-maddy@linux.ibm.com --- arch/powerpc/include/uapi/asm/ioctls.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h index 2c145da3b774..b5211e413829 100644 --- a/arch/powerpc/include/uapi/asm/ioctls.h +++ b/arch/powerpc/include/uapi/asm/ioctls.h @@ -23,10 +23,10 @@ #define TCSETSW _IOW('t', 21, struct termios) #define TCSETSF _IOW('t', 22, struct termios) -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) +#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */ +#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */ +#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */ +#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */ #define TCSBRK _IO('t', 29) #define TCXONC _IO('t', 30) From 26ec15e4b0c1d7b25214d9c0be1d50492e2f006c Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jun 2025 11:01:49 -0600 Subject: [PATCH 290/497] io_uring/kbuf: don't truncate end buffer for multiple buffer peeks If peeking a bunch of buffers, normally io_ring_buffers_peek() will truncate the end buffer. This isn't optimal as presumably more data will be arriving later, and hence it's better to stop with the last full buffer rather than truncate the end buffer. Cc: stable@vger.kernel.org Fixes: 35c8711c8fc4 ("io_uring/kbuf: add helpers for getting/peeking multiple buffers") Reported-by: Christian Mazakas Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 2ea65f3cef72..ce95e3af44a9 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -270,8 +270,11 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, /* truncate end piece, if needed, for non partial buffers */ if (len > arg->max_len) { len = arg->max_len; - if (!(bl->flags & IOBL_INC)) + if (!(bl->flags & IOBL_INC)) { + if (iov != arg->iovs) + break; buf->len = len; + } } iov->iov_base = u64_to_user_ptr(buf->addr); From f90fff1e152dedf52b932240ebbd670d83330eca Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 13 Jun 2025 19:26:50 +0200 Subject: [PATCH 291/497] posix-cpu-timers: fix race between handle_posix_cpu_timers() and posix_cpu_timer_del() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an exiting non-autoreaping task has already passed exit_notify() and calls handle_posix_cpu_timers() from IRQ, it can be reaped by its parent or debugger right after unlock_task_sighand(). If a concurrent posix_cpu_timer_del() runs at that moment, it won't be able to detect timer->it.cpu.firing != 0: cpu_timer_task_rcu() and/or lock_task_sighand() will fail. Add the tsk->exit_state check into run_posix_cpu_timers() to fix this. This fix is not needed if CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y, because exit_task_work() is called before exit_notify(). But the check still makes sense, task_work_add(&tsk->posix_cputimers_work.work) will fail anyway in this case. Cc: stable@vger.kernel.org Reported-by: Benoît Sevens Fixes: 0bdd2ed4138e ("sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand()") Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- kernel/time/posix-cpu-timers.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 50e8d04ab661..2e5b89d7d866 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1405,6 +1405,15 @@ void run_posix_cpu_timers(void) lockdep_assert_irqs_disabled(); + /* + * Ensure that release_task(tsk) can't happen while + * handle_posix_cpu_timers() is running. Otherwise, a concurrent + * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and + * miss timer->it.cpu.firing != 0. + */ + if (tsk->exit_state) + return; + /* * If the actual expiry is deferred to task work context and the * work is already scheduled there is no point to do anything here. From 907a7a2e5bf40c6a359b2f6cc53d6fdca04009e0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 11 Jun 2025 18:31:16 -0500 Subject: [PATCH 292/497] PCI/PM: Set up runtime PM even for devices without PCI PM 4d4c10f763d7 ("PCI: Explicitly put devices into D0 when initializing") intended to put PCI devices into D0, but in doing so unintentionally changed runtime PM initialization not to occur on devices that don't support PCI PM. This caused a regression in vfio-pci due to an imbalance with its use. Adjust the logic in pci_pm_init() so that even if PCI PM isn't supported runtime PM is still initialized. Fixes: 4d4c10f763d7 ("PCI: Explicitly put devices into D0 when initializing") Reported-by: Giovanni Cabiddu Closes: https://lore.kernel.org/linux-pci/20250424043232.1848107-1-superm1@kernel.org/T/#m7e8929d6421690dc8bd6dc639d86c2b4db27cbc4 Reported-by: Nicolas Dichtel Closes: https://lore.kernel.org/linux-pci/20250424043232.1848107-1-superm1@kernel.org/T/#m40d277dcdb9be64a1609a82412d1aa906263e201 Signed-off-by: Mario Limonciello Signed-off-by: Bjorn Helgaas Tested-by: Giovanni Cabiddu Tested-by: Nicolas Dichtel Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Rafael J. Wysocki Cc: Alex Williamson Link: https://patch.msgid.link/20250611233117.61810-1-superm1@kernel.org --- drivers/pci/pci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9448d55113b..9e42090fb108 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3217,14 +3217,14 @@ void pci_pm_init(struct pci_dev *dev) /* find PCI PM capability in list */ pm = pci_find_capability(dev, PCI_CAP_ID_PM); if (!pm) - return; + goto poweron; /* Check device's ability to generate PME# */ pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { pci_err(dev, "unsupported PM cap regs version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); - return; + goto poweron; } dev->pm_cap = pm; @@ -3269,6 +3269,7 @@ void pci_pm_init(struct pci_dev *dev) pci_read_config_word(dev, PCI_STATUS, &status); if (status & PCI_STATUS_IMM_READY) dev->imm_ready = 1; +poweron: pci_pm_power_up_and_verify_state(dev); pm_runtime_forbid(&dev->dev); pm_runtime_set_active(&dev->dev); From 9ce6c9875f3e995be5fd720b65835291f8a609b1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jun 2025 13:37:41 -0600 Subject: [PATCH 293/497] nvme: always punt polled uring_cmd end_io work to task_work Currently NVMe uring_cmd completions will complete locally, if they are polled. This is done because those completions are always invoked from task context. And while that is true, there's no guarantee that it's invoked under the right ring context, or even task. If someone does NVMe passthrough via multiple threads and with a limited number of poll queues, then ringA may find completions from ringB. For that case, completing the request may not be sound. Always just punt the passthrough completions via task_work, which will redirect the completion, if needed. Cc: stable@vger.kernel.org Fixes: 585079b6e425 ("nvme: wire up async polling for io passthrough commands") Signed-off-by: Jens Axboe --- drivers/nvme/host/ioctl.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 0b50da2f1175..6b3ac8ae3f34 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -429,21 +429,14 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, pdu->result = le64_to_cpu(nvme_req(req)->result.u64); /* - * For iopoll, complete it directly. Note that using the uring_cmd - * helper for this is safe only because we check blk_rq_is_poll(). - * As that returns false if we're NOT on a polled queue, then it's - * safe to use the polled completion helper. - * - * Otherwise, move the completion to task work. + * IOPOLL could potentially complete this request directly, but + * if multiple rings are polling on the same queue, then it's possible + * for one ring to find completions for another ring. Punting the + * completion via task_work will always direct it to the right + * location, rather than potentially complete requests for ringA + * under iopoll invocations from ringB. */ - if (blk_rq_is_poll(req)) { - if (pdu->bio) - blk_rq_unmap_user(pdu->bio); - io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); - } else { - io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); - } - + io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); return RQ_END_IO_FREE; } From b62e0efd8a8571460d05922862a451855ebdf3c6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Jun 2025 15:24:53 -0600 Subject: [PATCH 294/497] io_uring: run local task_work from ring exit IOPOLL reaping In preparation for needing to shift NVMe passthrough to always use task_work for polled IO completions, ensure that those are suitably run at exit time. See commit: 9ce6c9875f3e ("nvme: always punt polled uring_cmd end_io work to task_work") for details on why that is necessary. Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4e32f808d07d..5111ec040c53 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1523,6 +1523,9 @@ static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) } } mutex_unlock(&ctx->uring_lock); + + if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) + io_move_task_work_from_local(ctx); } static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events) From 1b56e765bf8990f1f60e124926c11fc4ac63d752 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 12 Jun 2025 14:17:13 +0200 Subject: [PATCH 295/497] rust: completion: implement initial abstraction Implement a minimal abstraction for the completion synchronization primitive. This initial abstraction only adds complete_all() and wait_for_completion(), since that is what is required for the subsequent Devres patch. Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Valentin Schneider Reviewed-by: Alice Ryhl Reviewed-by: Boqun Feng Reviewed-by: Benno Lossin Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250612121817.1621-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/bindings/bindings_helper.h | 1 + rust/helpers/completion.c | 8 +++ rust/helpers/helpers.c | 1 + rust/kernel/sync.rs | 2 + rust/kernel/sync/completion.rs | 112 ++++++++++++++++++++++++++++++++ 5 files changed, 124 insertions(+) create mode 100644 rust/helpers/completion.c create mode 100644 rust/kernel/sync/completion.rs diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index bc494745f67b..8cbb660e2ec2 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include diff --git a/rust/helpers/completion.c b/rust/helpers/completion.c new file mode 100644 index 000000000000..b2443262a2ae --- /dev/null +++ b/rust/helpers/completion.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +void rust_helper_init_completion(struct completion *x) +{ + init_completion(x); +} diff --git a/rust/helpers/helpers.c b/rust/helpers/helpers.c index 0f1b5d115985..6451cfd94a8d 100644 --- a/rust/helpers/helpers.c +++ b/rust/helpers/helpers.c @@ -13,6 +13,7 @@ #include "build_assert.c" #include "build_bug.c" #include "clk.c" +#include "completion.c" #include "cpufreq.c" #include "cpumask.c" #include "cred.c" diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 36a719015583..c23a12639924 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -10,6 +10,7 @@ use pin_init; mod arc; +pub mod completion; mod condvar; pub mod lock; mod locked_by; @@ -17,6 +18,7 @@ pub mod rcu; pub use arc::{Arc, ArcBorrow, UniqueArc}; +pub use completion::Completion; pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult}; pub use lock::global::{global_lock, GlobalGuard, GlobalLock, GlobalLockBackend, GlobalLockedBy}; pub use lock::mutex::{new_mutex, Mutex, MutexGuard}; diff --git a/rust/kernel/sync/completion.rs b/rust/kernel/sync/completion.rs new file mode 100644 index 000000000000..c50012a940a3 --- /dev/null +++ b/rust/kernel/sync/completion.rs @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Completion support. +//! +//! Reference: +//! +//! C header: [`include/linux/completion.h`](srctree/include/linux/completion.h) + +use crate::{bindings, prelude::*, types::Opaque}; + +/// Synchronization primitive to signal when a certain task has been completed. +/// +/// The [`Completion`] synchronization primitive signals when a certain task has been completed by +/// waking up other tasks that have been queued up to wait for the [`Completion`] to be completed. +/// +/// # Examples +/// +/// ``` +/// use kernel::sync::{Arc, Completion}; +/// use kernel::workqueue::{self, impl_has_work, new_work, Work, WorkItem}; +/// +/// #[pin_data] +/// struct MyTask { +/// #[pin] +/// work: Work, +/// #[pin] +/// done: Completion, +/// } +/// +/// impl_has_work! { +/// impl HasWork for MyTask { self.work } +/// } +/// +/// impl MyTask { +/// fn new() -> Result> { +/// let this = Arc::pin_init(pin_init!(MyTask { +/// work <- new_work!("MyTask::work"), +/// done <- Completion::new(), +/// }), GFP_KERNEL)?; +/// +/// let _ = workqueue::system().enqueue(this.clone()); +/// +/// Ok(this) +/// } +/// +/// fn wait_for_completion(&self) { +/// self.done.wait_for_completion(); +/// +/// pr_info!("Completion: task complete\n"); +/// } +/// } +/// +/// impl WorkItem for MyTask { +/// type Pointer = Arc; +/// +/// fn run(this: Arc) { +/// // process this task +/// this.done.complete_all(); +/// } +/// } +/// +/// let task = MyTask::new()?; +/// task.wait_for_completion(); +/// # Ok::<(), Error>(()) +/// ``` +#[pin_data] +pub struct Completion { + #[pin] + inner: Opaque, +} + +// SAFETY: `Completion` is safe to be send to any task. +unsafe impl Send for Completion {} + +// SAFETY: `Completion` is safe to be accessed concurrently. +unsafe impl Sync for Completion {} + +impl Completion { + /// Create an initializer for a new [`Completion`]. + pub fn new() -> impl PinInit { + pin_init!(Self { + inner <- Opaque::ffi_init(|slot: *mut bindings::completion| { + // SAFETY: `slot` is a valid pointer to an uninitialized `struct completion`. + unsafe { bindings::init_completion(slot) }; + }), + }) + } + + fn as_raw(&self) -> *mut bindings::completion { + self.inner.get() + } + + /// Signal all tasks waiting on this completion. + /// + /// This method wakes up all tasks waiting on this completion; after this operation the + /// completion is permanently done, i.e. signals all current and future waiters. + pub fn complete_all(&self) { + // SAFETY: `self.as_raw()` is a pointer to a valid `struct completion`. + unsafe { bindings::complete_all(self.as_raw()) }; + } + + /// Wait for completion of a task. + /// + /// This method waits for the completion of a task; it is not interruptible and there is no + /// timeout. + /// + /// See also [`Completion::complete_all`]. + pub fn wait_for_completion(&self) { + // SAFETY: `self.as_raw()` is a pointer to a valid `struct completion`. + unsafe { bindings::wait_for_completion(self.as_raw()) }; + } +} From 4b76fafb20dd4a2becb94949d78e86bc88006509 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 12 Jun 2025 14:17:14 +0200 Subject: [PATCH 296/497] rust: revocable: indicate whether `data` has been revoked already Return a boolean from Revocable::revoke() and Revocable::revoke_nosync() to indicate whether the data has been revoked already. Return true if the data hasn't been revoked yet (i.e. this call revoked the data), false otherwise. This is required by Devres in order to synchronize the completion of the revoke process. Reviewed-by: Benno Lossin Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250612121817.1621-3-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/revocable.rs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs index db4aa46bb121..06a3cdfce344 100644 --- a/rust/kernel/revocable.rs +++ b/rust/kernel/revocable.rs @@ -154,8 +154,10 @@ pub unsafe fn access(&self) -> &T { /// # Safety /// /// Callers must ensure that there are no more concurrent users of the revocable object. - unsafe fn revoke_internal(&self) { - if self.is_available.swap(false, Ordering::Relaxed) { + unsafe fn revoke_internal(&self) -> bool { + let revoke = self.is_available.swap(false, Ordering::Relaxed); + + if revoke { if SYNC { // SAFETY: Just an FFI call, there are no further requirements. unsafe { bindings::synchronize_rcu() }; @@ -165,6 +167,8 @@ unsafe fn revoke_internal(&self) { // `compare_exchange` above that takes `is_available` from `true` to `false`. unsafe { drop_in_place(self.data.get()) }; } + + revoke } /// Revokes access to and drops the wrapped object. @@ -172,10 +176,13 @@ unsafe fn revoke_internal(&self) { /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`], /// expecting that there are no concurrent users of the object. /// + /// Returns `true` if `&self` has been revoked with this call, `false` if it was revoked + /// already. + /// /// # Safety /// /// Callers must ensure that there are no more concurrent users of the revocable object. - pub unsafe fn revoke_nosync(&self) { + pub unsafe fn revoke_nosync(&self) -> bool { // SAFETY: By the safety requirement of this function, the caller ensures that nobody is // accessing the data anymore and hence we don't have to wait for the grace period to // finish. @@ -189,7 +196,10 @@ pub unsafe fn revoke_nosync(&self) { /// If there are concurrent users of the object (i.e., ones that called /// [`Revocable::try_access`] beforehand and still haven't dropped the returned guard), this /// function waits for the concurrent access to complete before dropping the wrapped object. - pub fn revoke(&self) { + /// + /// Returns `true` if `&self` has been revoked with this call, `false` if it was revoked + /// already. + pub fn revoke(&self) -> bool { // SAFETY: By passing `true` we ask `revoke_internal` to wait for the grace period to // finish. unsafe { self.revoke_internal::() } From f744201c6159fc7323c40936fd079525f7063598 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 12 Jun 2025 14:17:15 +0200 Subject: [PATCH 297/497] rust: devres: fix race in Devres::drop() In Devres::drop() we first remove the devres action and then drop the wrapped device resource. The design goal is to give the owner of a Devres object control over when the device resource is dropped, but limit the overall scope to the corresponding device being bound to a driver. However, there's a race that was introduced with commit 8ff656643d30 ("rust: devres: remove action in `Devres::drop`"), but also has been (partially) present from the initial version on. In Devres::drop(), the devres action is removed successfully and subsequently the destructor of the wrapped device resource runs. However, there is no guarantee that the destructor of the wrapped device resource completes before the driver core is done unbinding the corresponding device. If in Devres::drop(), the devres action can't be removed, it means that the devres callback has been executed already, or is still running concurrently. In case of the latter, either Devres::drop() wins revoking the Revocable or the devres callback wins revoking the Revocable. If Devres::drop() wins, we (again) have no guarantee that the destructor of the wrapped device resource completes before the driver core is done unbinding the corresponding device. CPU0 CPU1 ------------------------------------------------------------------------ Devres::drop() { Devres::devres_callback() { self.data.revoke() { this.data.revoke() { is_available.swap() == true is_available.swap == false } } // [...] // device fully unbound drop_in_place() { // release device resource } } } Depending on the specific device resource, this can potentially lead to user-after-free bugs. In order to fix this, implement the following logic. In the devres callback, we're always good when we get to revoke the device resource ourselves, i.e. Revocable::revoke() returns true. If Revocable::revoke() returns false, it means that Devres::drop(), concurrently, already drops the device resource and we have to wait for Devres::drop() to signal that it finished dropping the device resource. Note that if we hit the case where we need to wait for the completion of Devres::drop() in the devres callback, it means that we're actually racing with a concurrent Devres::drop() call, which already started revoking the device resource for us. This is rather unlikely and means that the concurrent Devres::drop() already started doing our work and we just need to wait for it to complete it for us. Hence, there should not be any additional overhead from that. (Actually, for now it's even better if Devres::drop() does the work for us, since it can bypass the synchronize_rcu() call implied by Revocable::revoke(), but this goes away anyways once I get to implement the split devres callback approach, which allows us to first flip the atomics of all registered Devres objects of a certain device, execute a single synchronize_rcu() and then drop all revocable objects.) In Devres::drop() we try to revoke the device resource. If that is *not* successful, it means that the devres callback already did and we're good. Otherwise, we try to remove the devres action, which, if successful, means that we're good, since the device resource has just been revoked by us *before* we removed the devres action successfully. If the devres action could not be removed, it means that the devres callback must be running concurrently, hence we signal that the device resource has been revoked by us, using the completion. This makes it safe to drop a Devres object from any task and at any point of time, which is one of the design goals. Fixes: 76c01ded724b ("rust: add devres abstraction") Reported-by: Alice Ryhl Closes: https://lore.kernel.org/lkml/aD64YNuqbPPZHAa5@google.com/ Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250612121817.1621-4-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/devres.rs | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 0f79a2ec9474..2f74bce5ed9d 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -13,7 +13,7 @@ ffi::c_void, prelude::*, revocable::Revocable, - sync::Arc, + sync::{Arc, Completion}, types::ARef, }; @@ -25,13 +25,17 @@ struct DevresInner { callback: unsafe extern "C" fn(*mut c_void), #[pin] data: Revocable, + #[pin] + revoke: Completion, } /// This abstraction is meant to be used by subsystems to containerize [`Device`] bound resources to /// manage their lifetime. /// /// [`Device`] bound resources should be freed when either the resource goes out of scope or the -/// [`Device`] is unbound respectively, depending on what happens first. +/// [`Device`] is unbound respectively, depending on what happens first. In any case, it is always +/// guaranteed that revoking the device resource is completed before the corresponding [`Device`] +/// is unbound. /// /// To achieve that [`Devres`] registers a devres callback on creation, which is called once the /// [`Device`] is unbound, revoking access to the encapsulated resource (see also [`Revocable`]). @@ -102,6 +106,7 @@ fn new(dev: &Device, data: T, flags: Flags) -> Result> dev: dev.into(), callback: Self::devres_callback, data <- Revocable::new(data), + revoke <- Completion::new(), }), flags, )?; @@ -130,26 +135,28 @@ fn as_ptr(&self) -> *const Self { self as _ } - fn remove_action(this: &Arc) { + fn remove_action(this: &Arc) -> bool { // SAFETY: // - `self.inner.dev` is a valid `Device`, // - the `action` and `data` pointers are the exact same ones as given to devm_add_action() // previously, // - `self` is always valid, even if the action has been released already. - let ret = unsafe { + let success = unsafe { bindings::devm_remove_action_nowarn( this.dev.as_raw(), Some(this.callback), this.as_ptr() as _, ) - }; + } == 0; - if ret == 0 { + if success { // SAFETY: We leaked an `Arc` reference to devm_add_action() in `DevresInner::new`; if // devm_remove_action_nowarn() was successful we can (and have to) claim back ownership // of this reference. let _ = unsafe { Arc::from_raw(this.as_ptr()) }; } + + success } #[allow(clippy::missing_safety_doc)] @@ -161,7 +168,12 @@ fn remove_action(this: &Arc) { // `DevresInner::new`. let inner = unsafe { Arc::from_raw(ptr) }; - inner.data.revoke(); + if !inner.data.revoke() { + // If `revoke()` returns false, it means that `Devres::drop` already started revoking + // `inner.data` for us. Hence we have to wait until `Devres::drop()` signals that it + // completed revoking `inner.data`. + inner.revoke.wait_for_completion(); + } } } @@ -232,6 +244,15 @@ fn deref(&self) -> &Self::Target { impl Drop for Devres { fn drop(&mut self) { - DevresInner::remove_action(&self.0); + // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data + // anymore, hence it is safe not to wait for the grace period to finish. + if unsafe { self.revoke_nosync() } { + // We revoked `self.0.data` before the devres action did, hence try to remove it. + if !DevresInner::remove_action(&self.0) { + // We could not remove the devres action, which means that it now runs concurrently, + // hence signal that `self.0.data` has been revoked successfully. + self.0.revoke.complete_all(); + } + } } } From 20c96ed278e362ae4e324ed7d8c69fb48c508d3c Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Wed, 11 Jun 2025 19:48:25 +0200 Subject: [PATCH 298/497] rust: devres: do not dereference to the internal Revocable We can't expose direct access to the internal Revocable, since this allows users to directly revoke the internal Revocable without Devres having the chance to synchronize with the devres callback -- we have to guarantee that the internal Revocable has been fully revoked before the device is fully unbound. Hence, remove the corresponding Deref implementation and, instead, provide indirect accessors for the internal Revocable. Note that we can still support Devres::revoke() by implementing the required synchronization (which would be almost identical to the synchronization in Devres::drop()). Fixes: 76c01ded724b ("rust: add devres abstraction") Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20250611174827.380555-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- rust/kernel/devres.rs | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 2f74bce5ed9d..57502534d985 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -12,13 +12,11 @@ error::{Error, Result}, ffi::c_void, prelude::*, - revocable::Revocable, - sync::{Arc, Completion}, + revocable::{Revocable, RevocableGuard}, + sync::{rcu, Arc, Completion}, types::ARef, }; -use core::ops::Deref; - #[pin_data] struct DevresInner { dev: ARef, @@ -230,15 +228,22 @@ pub fn access<'a>(&'a self, dev: &'a Device) -> Result<&'a T> { // SAFETY: `dev` being the same device as the device this `Devres` has been created for // proves that `self.0.data` hasn't been revoked and is guaranteed to not be revoked as // long as `dev` lives; `dev` lives at least as long as `self`. - Ok(unsafe { self.deref().access() }) + Ok(unsafe { self.0.data.access() }) } -} -impl Deref for Devres { - type Target = Revocable; + /// [`Devres`] accessor for [`Revocable::try_access`]. + pub fn try_access(&self) -> Option> { + self.0.data.try_access() + } - fn deref(&self) -> &Self::Target { - &self.0.data + /// [`Devres`] accessor for [`Revocable::try_access_with`]. + pub fn try_access_with R>(&self, f: F) -> Option { + self.0.data.try_access_with(f) + } + + /// [`Devres`] accessor for [`Revocable::try_access_with_guard`]. + pub fn try_access_with_guard<'a>(&'a self, guard: &'a rcu::Guard) -> Option<&'a T> { + self.0.data.try_access_with_guard(guard) } } @@ -246,7 +251,7 @@ impl Drop for Devres { fn drop(&mut self) { // SAFETY: When `drop` runs, it is guaranteed that nobody is accessing the revocable data // anymore, hence it is safe not to wait for the grace period to finish. - if unsafe { self.revoke_nosync() } { + if unsafe { self.0.data.revoke_nosync() } { // We revoked `self.0.data` before the devres action did, hence try to remove it. if !DevresInner::remove_action(&self.0) { // We could not remove the devres action, which means that it now runs concurrently, From d3deabe4c619875714b9a844b1a3d9752dbae1dd Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 13 Jun 2025 07:41:44 -0700 Subject: [PATCH 299/497] drm/msm: Fix inverted WARN_ON() logic We want to WARN_ON() if info is NULL. Suggested-by: Konrad Dybcio Fixes: 0838fc3e6718 ("drm/msm/adreno: Check for recognized GPU before bind") Tested-by: Neil Armstrong Signed-off-by: Rob Clark Reported-by: Alexey Klimov Reviewed-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/658631/ --- drivers/gpu/drm/msm/adreno/adreno_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 5e7307567239..16e7ac444efd 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -221,7 +221,7 @@ static int adreno_bind(struct device *dev, struct device *master, void *data) info = adreno_info(config.chip_id); /* We shouldn't have gotten this far if we don't recognize the GPU: */ - if (!WARN_ON(info)) + if (WARN_ON(!info)) return -ENXIO; config.info = info; From 1d27f11bf02b38c431e49a17dee5c10a2b4c2e28 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 15 Jun 2025 08:09:14 -0600 Subject: [PATCH 300/497] io_uring/rsrc: validate buffer count with offset for cloning syzbot reports that it can trigger a WARN_ON() for kmalloc() attempt that's too big: WARNING: CPU: 0 PID: 6488 at mm/slub.c:5024 __kvmalloc_node_noprof+0x520/0x640 mm/slub.c:5024 Modules linked in: CPU: 0 UID: 0 PID: 6488 Comm: syz-executor312 Not tainted 6.15.0-rc7-syzkaller-gd7fa1af5b33e #0 PREEMPT Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __kvmalloc_node_noprof+0x520/0x640 mm/slub.c:5024 lr : __do_kmalloc_node mm/slub.c:-1 [inline] lr : __kvmalloc_node_noprof+0x3b4/0x640 mm/slub.c:5012 sp : ffff80009cfd7a90 x29: ffff80009cfd7ac0 x28: ffff0000dd52a120 x27: 0000000000412dc0 x26: 0000000000000178 x25: ffff7000139faf70 x24: 0000000000000000 x23: ffff800082f4cea8 x22: 00000000ffffffff x21: 000000010cd004a8 x20: ffff0000d75816c0 x19: ffff0000dd52a000 x18: 00000000ffffffff x17: ffff800092f39000 x16: ffff80008adbe9e4 x15: 0000000000000005 x14: 1ffff000139faf1c x13: 0000000000000000 x12: 0000000000000000 x11: ffff7000139faf21 x10: 0000000000000003 x9 : ffff80008f27b938 x8 : 0000000000000002 x7 : 0000000000000000 x6 : 0000000000000000 x5 : 00000000ffffffff x4 : 0000000000400dc0 x3 : 0000000200000000 x2 : 000000010cd004a8 x1 : ffff80008b3ebc40 x0 : 0000000000000001 Call trace: __kvmalloc_node_noprof+0x520/0x640 mm/slub.c:5024 (P) kvmalloc_array_node_noprof include/linux/slab.h:1065 [inline] io_rsrc_data_alloc io_uring/rsrc.c:206 [inline] io_clone_buffers io_uring/rsrc.c:1178 [inline] io_register_clone_buffers+0x484/0xa14 io_uring/rsrc.c:1287 __io_uring_register io_uring/register.c:815 [inline] __do_sys_io_uring_register io_uring/register.c:926 [inline] __se_sys_io_uring_register io_uring/register.c:903 [inline] __arm64_sys_io_uring_register+0x42c/0xea8 io_uring/register.c:903 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 which is due to offset + buffer_count being too large. The registration code checks only the total count of buffers, but given that the indexing is an array, it should also check offset + count. That can't exceed IORING_MAX_REG_BUFFERS either, as there's no way to reach buffers beyond that limit. There's no issue with registrering a table this large, outside of the fact that it's pointless to register buffers that cannot be reached, and that it can trigger this kmalloc() warning for attempting an allocation that is too large. Cc: stable@vger.kernel.org Fixes: b16e920a1909 ("io_uring/rsrc: allow cloning at an offset") Reported-by: syzbot+cb4bf3cb653be0d25de8@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/684e77bd.a00a0220.279073.0029.GAE@google.com/ Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index c592ceace97d..94a9db030e0e 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1177,6 +1177,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx return -EINVAL; if (check_add_overflow(arg->nr, arg->dst_off, &nbufs)) return -EOVERFLOW; + if (nbufs > IORING_MAX_REG_BUFFERS) + return -EINVAL; ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr)); if (ret) From a6a7946bd691940cfe7289ae6dfb1f077516df72 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 13 Jun 2025 01:08:48 +0900 Subject: [PATCH 301/497] kbuild: move warnings about linux/export.h from W=1 to W=2 This hides excessive warnings, as nobody builds with W=2. Fixes: a934a57a42f6 ("scripts/misc-check: check missing #include when W=1") Fixes: 7d95680d64ac ("scripts/misc-check: check unnecessary #include when W=1") Signed-off-by: Masahiro Yamada Acked-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Acked-by: Heiko Carstens --- Makefile | 3 --- scripts/misc-check | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 35e6e5240c61..69c534982415 100644 --- a/Makefile +++ b/Makefile @@ -1832,12 +1832,9 @@ rustfmtcheck: rustfmt # Misc # --------------------------------------------------------------------------- -# Run misc checks when ${KBUILD_EXTRA_WARN} contains 1 PHONY += misc-check -ifneq ($(findstring 1,$(KBUILD_EXTRA_WARN)),) misc-check: $(Q)$(srctree)/scripts/misc-check -endif all: misc-check diff --git a/scripts/misc-check b/scripts/misc-check index a74450e799d1..84f08da17b2c 100755 --- a/scripts/misc-check +++ b/scripts/misc-check @@ -62,6 +62,15 @@ check_unnecessary_include_linux_export_h () { xargs -r printf "%s: warning: EXPORT_SYMBOL() is not used, but #include is present\n" >&2 } -check_tracked_ignored_files -check_missing_include_linux_export_h -check_unnecessary_include_linux_export_h +case "${KBUILD_EXTRA_WARN}" in +*1*) + check_tracked_ignored_files + ;; +esac + +case "${KBUILD_EXTRA_WARN}" in +*2*) + check_missing_include_linux_export_h + check_unnecessary_include_linux_export_h + ;; +esac From 2f6b47b295518c3ba16fabb1dddbe6a319899acb Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Sat, 14 Jun 2025 00:55:33 +0000 Subject: [PATCH 302/497] gendwarfksyms: Fix structure type overrides As we always iterate through the entire die_map when expanding type strings, recursively processing referenced types in type_expand_child() is not actually necessary. Furthermore, the type_string kABI rule added in commit c9083467f7b9 ("gendwarfksyms: Add a kABI rule to override type strings") can fail to override type strings for structures due to a missing kabi_get_type_string() check in this function. Fix the issue by dropping the unnecessary recursion and moving the override check to type_expand(). Note that symbol versions are otherwise unchanged with this patch. Fixes: c9083467f7b9 ("gendwarfksyms: Add a kABI rule to override type strings") Reported-by: Giuliano Procida Signed-off-by: Sami Tolvanen Reviewed-by: Petr Pavlu Signed-off-by: Masahiro Yamada --- scripts/gendwarfksyms/gendwarfksyms.h | 14 +----- scripts/gendwarfksyms/types.c | 65 ++++++++------------------- 2 files changed, 21 insertions(+), 58 deletions(-) diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index 7dd03ffe0c5c..d9c06d2cb1df 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -216,24 +216,14 @@ int cache_get(struct cache *cache, unsigned long key); void cache_init(struct cache *cache); void cache_free(struct cache *cache); -static inline void __cache_mark_expanded(struct cache *cache, uintptr_t addr) -{ - cache_set(cache, addr, 1); -} - -static inline bool __cache_was_expanded(struct cache *cache, uintptr_t addr) -{ - return cache_get(cache, addr) == 1; -} - static inline void cache_mark_expanded(struct cache *cache, void *addr) { - __cache_mark_expanded(cache, (uintptr_t)addr); + cache_set(cache, (unsigned long)addr, 1); } static inline bool cache_was_expanded(struct cache *cache, void *addr) { - return __cache_was_expanded(cache, (uintptr_t)addr); + return cache_get(cache, (unsigned long)addr) == 1; } /* diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c index 39ce1770e463..7bd459ea6c59 100644 --- a/scripts/gendwarfksyms/types.c +++ b/scripts/gendwarfksyms/types.c @@ -333,37 +333,11 @@ static void calculate_version(struct version *version, cache_free(&expansion_cache); } -static void __type_expand(struct die *cache, struct type_expansion *type, - bool recursive); - -static void type_expand_child(struct die *cache, struct type_expansion *type, - bool recursive) -{ - struct type_expansion child; - char *name; - - name = get_type_name(cache); - if (!name) { - __type_expand(cache, type, recursive); - return; - } - - if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { - __cache_mark_expanded(&expansion_cache, cache->addr); - type_expansion_init(&child); - __type_expand(cache, &child, true); - type_map_add(name, &child); - type_expansion_free(&child); - } - - type_expansion_append(type, name, name); -} - -static void __type_expand(struct die *cache, struct type_expansion *type, - bool recursive) +static void __type_expand(struct die *cache, struct type_expansion *type) { struct die_fragment *df; struct die *child; + char *name; list_for_each_entry(df, &cache->fragments, list) { switch (df->type) { @@ -379,7 +353,12 @@ static void __type_expand(struct die *cache, struct type_expansion *type, error("unknown child: %" PRIxPTR, df->data.addr); - type_expand_child(child, type, recursive); + name = get_type_name(child); + if (name) + type_expansion_append(type, name, name); + else + __type_expand(child, type); + break; case FRAGMENT_LINEBREAK: /* @@ -397,12 +376,17 @@ static void __type_expand(struct die *cache, struct type_expansion *type, } } -static void type_expand(struct die *cache, struct type_expansion *type, - bool recursive) +static void type_expand(const char *name, struct die *cache, + struct type_expansion *type) { + const char *override; + type_expansion_init(type); - __type_expand(cache, type, recursive); - cache_free(&expansion_cache); + + if (stable && kabi_get_type_string(name, &override)) + type_parse(name, override, type); + else + __type_expand(cache, type); } static void type_parse(const char *name, const char *str, @@ -416,8 +400,6 @@ static void type_parse(const char *name, const char *str, if (!*str) error("empty type string override for '%s'", name); - type_expansion_init(type); - for (pos = 0; str[pos]; ++pos) { bool empty; char marker = ' '; @@ -478,7 +460,6 @@ static void type_parse(const char *name, const char *str, static void expand_type(struct die *cache, void *arg) { struct type_expansion type; - const char *override; char *name; if (cache->mapped) @@ -504,11 +485,7 @@ static void expand_type(struct die *cache, void *arg) debug("%s", name); - if (stable && kabi_get_type_string(name, &override)) - type_parse(name, override, &type); - else - type_expand(cache, &type, true); - + type_expand(name, cache, &type); type_map_add(name, &type); type_expansion_free(&type); free(name); @@ -518,7 +495,6 @@ static void expand_symbol(struct symbol *sym, void *arg) { struct type_expansion type; struct version version; - const char *override; struct die *cache; /* @@ -532,10 +508,7 @@ static void expand_symbol(struct symbol *sym, void *arg) if (__die_map_get(sym->die_addr, DIE_SYMBOL, &cache)) return; /* We'll warn about missing CRCs later. */ - if (stable && kabi_get_type_string(sym->name, &override)) - type_parse(sym->name, override, &type); - else - type_expand(cache, &type, false); + type_expand(sym->name, cache, &type); /* If the symbol already has a version, don't calculate it again. */ if (sym->state != SYMBOL_PROCESSED) { From 89465d923bda180299e69ee2800aab84ad0ba689 Mon Sep 17 00:00:00 2001 From: Penglei Jiang Date: Sun, 15 Jun 2025 09:39:06 -0700 Subject: [PATCH 303/497] io_uring: fix task leak issue in io_wq_create() Add missing put_task_struct() in the error path Cc: stable@vger.kernel.org Fixes: 0f8baa3c9802 ("io-wq: fully initialize wqe before calling cpuhp_state_add_instance_nocalls()") Signed-off-by: Penglei Jiang Link: https://lore.kernel.org/r/20250615163906.2367-1-superman.xpt@gmail.com Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index cd1fcb115739..be91edf34f01 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1259,8 +1259,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) atomic_set(&wq->worker_refs, 1); init_completion(&wq->worker_done); ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); - if (ret) + if (ret) { + put_task_struct(wq->task); goto err; + } return wq; err: From e04c78d86a9699d136910cfc0bdcf01087e3267e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Jun 2025 13:49:41 -0700 Subject: [PATCH 304/497] Linux 6.16-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 69c534982415..ba0827a1fccd 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* From e202196b8aa249d78ab87eae56bbe0e71e3dc39c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 29 May 2025 10:31:17 -0700 Subject: [PATCH 305/497] lib/crypto: Annotate crypto strings with nonstring Annotate various keys, ivs, and other byte arrays with __nonstring so that static initializers will not complain about truncating the trailing NUL byte under GCC 15 with -Wunterminated-string-initialization enabled. Silences many warnings like: ../lib/crypto/aesgcm.c:642:27: warning: initializer-string for array of 'unsigned char' truncates NUL terminator but destination lacks 'nonstring' attribute (13 chars into 12 available) [-Wunterminated-string-initialization] 642 | .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20250529173113.work.760-kees@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/aescfb.c | 8 ++++---- lib/crypto/aesgcm.c | 46 ++++++++++++++++++++++----------------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/lib/crypto/aescfb.c b/lib/crypto/aescfb.c index 437613265e14..2f09ae92ffa0 100644 --- a/lib/crypto/aescfb.c +++ b/lib/crypto/aescfb.c @@ -106,11 +106,11 @@ MODULE_LICENSE("GPL"); */ static struct { - u8 ptext[64]; - u8 ctext[64]; + u8 ptext[64] __nonstring; + u8 ctext[64] __nonstring; - u8 key[AES_MAX_KEY_SIZE]; - u8 iv[AES_BLOCK_SIZE]; + u8 key[AES_MAX_KEY_SIZE] __nonstring; + u8 iv[AES_BLOCK_SIZE] __nonstring; int klen; int len; diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c index 277824d6b4af..faa4dee9bb1b 100644 --- a/lib/crypto/aesgcm.c +++ b/lib/crypto/aesgcm.c @@ -205,19 +205,19 @@ MODULE_LICENSE("GPL"); * Test code below. Vectors taken from crypto/testmgr.h */ -static const u8 __initconst ctext0[16] = +static const u8 __initconst ctext0[16] __nonstring = "\x58\xe2\xfc\xce\xfa\x7e\x30\x61" "\x36\x7f\x1d\x57\xa4\xe7\x45\x5a"; static const u8 __initconst ptext1[16]; -static const u8 __initconst ctext1[32] = +static const u8 __initconst ctext1[32] __nonstring = "\x03\x88\xda\xce\x60\xb6\xa3\x92" "\xf3\x28\xc2\xb9\x71\xb2\xfe\x78" "\xab\x6e\x47\xd4\x2c\xec\x13\xbd" "\xf5\x3a\x67\xb2\x12\x57\xbd\xdf"; -static const u8 __initconst ptext2[64] = +static const u8 __initconst ptext2[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -227,7 +227,7 @@ static const u8 __initconst ptext2[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext2[80] = +static const u8 __initconst ctext2[80] __nonstring = "\x42\x83\x1e\xc2\x21\x77\x74\x24" "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" @@ -239,7 +239,7 @@ static const u8 __initconst ctext2[80] = "\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6" "\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4"; -static const u8 __initconst ptext3[60] = +static const u8 __initconst ptext3[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -249,7 +249,7 @@ static const u8 __initconst ptext3[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext3[76] = +static const u8 __initconst ctext3[76] __nonstring = "\x42\x83\x1e\xc2\x21\x77\x74\x24" "\x4b\x72\x21\xb7\x84\xd0\xd4\x9c" "\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0" @@ -261,17 +261,17 @@ static const u8 __initconst ctext3[76] = "\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb" "\x94\xfa\xe9\x5a\xe7\x12\x1a\x47"; -static const u8 __initconst ctext4[16] = +static const u8 __initconst ctext4[16] __nonstring = "\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b" "\xa0\x0e\xd1\xf3\x12\x57\x24\x35"; -static const u8 __initconst ctext5[32] = +static const u8 __initconst ctext5[32] __nonstring = "\x98\xe7\x24\x7c\x07\xf0\xfe\x41" "\x1c\x26\x7e\x43\x84\xb0\xf6\x00" "\x2f\xf5\x8d\x80\x03\x39\x27\xab" "\x8e\xf4\xd4\x58\x75\x14\xf0\xfb"; -static const u8 __initconst ptext6[64] = +static const u8 __initconst ptext6[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -281,7 +281,7 @@ static const u8 __initconst ptext6[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext6[80] = +static const u8 __initconst ctext6[80] __nonstring = "\x39\x80\xca\x0b\x3c\x00\xe8\x41" "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" @@ -293,17 +293,17 @@ static const u8 __initconst ctext6[80] = "\x99\x24\xa7\xc8\x58\x73\x36\xbf" "\xb1\x18\x02\x4d\xb8\x67\x4a\x14"; -static const u8 __initconst ctext7[16] = +static const u8 __initconst ctext7[16] __nonstring = "\x53\x0f\x8a\xfb\xc7\x45\x36\xb9" "\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b"; -static const u8 __initconst ctext8[32] = +static const u8 __initconst ctext8[32] __nonstring = "\xce\xa7\x40\x3d\x4d\x60\x6b\x6e" "\x07\x4e\xc5\xd3\xba\xf3\x9d\x18" "\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0" "\x26\x5b\x98\xb5\xd4\x8a\xb9\x19"; -static const u8 __initconst ptext9[64] = +static const u8 __initconst ptext9[64] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -313,7 +313,7 @@ static const u8 __initconst ptext9[64] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39\x1a\xaf\xd2\x55"; -static const u8 __initconst ctext9[80] = +static const u8 __initconst ctext9[80] __nonstring = "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" @@ -325,7 +325,7 @@ static const u8 __initconst ctext9[80] = "\xb0\x94\xda\xc5\xd9\x34\x71\xbd" "\xec\x1a\x50\x22\x70\xe3\xcc\x6c"; -static const u8 __initconst ptext10[60] = +static const u8 __initconst ptext10[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -335,7 +335,7 @@ static const u8 __initconst ptext10[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext10[76] = +static const u8 __initconst ctext10[76] __nonstring = "\x52\x2d\xc1\xf0\x99\x56\x7d\x07" "\xf4\x7f\x37\xa3\x2a\x84\x42\x7d" "\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9" @@ -347,7 +347,7 @@ static const u8 __initconst ctext10[76] = "\x76\xfc\x6e\xce\x0f\x4e\x17\x68" "\xcd\xdf\x88\x53\xbb\x2d\x55\x1b"; -static const u8 __initconst ptext11[60] = +static const u8 __initconst ptext11[60] __nonstring = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" "\x86\xa7\xa9\x53\x15\x34\xf7\xda" @@ -357,7 +357,7 @@ static const u8 __initconst ptext11[60] = "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" "\xba\x63\x7b\x39"; -static const u8 __initconst ctext11[76] = +static const u8 __initconst ctext11[76] __nonstring = "\x39\x80\xca\x0b\x3c\x00\xe8\x41" "\xeb\x06\xfa\xc4\x87\x2a\x27\x57" "\x85\x9e\x1c\xea\xa6\xef\xd9\x84" @@ -369,7 +369,7 @@ static const u8 __initconst ctext11[76] = "\x25\x19\x49\x8e\x80\xf1\x47\x8f" "\x37\xba\x55\xbd\x6d\x27\x61\x8c"; -static const u8 __initconst ptext12[719] = +static const u8 __initconst ptext12[719] __nonstring = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" @@ -461,7 +461,7 @@ static const u8 __initconst ptext12[719] = "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" "\xa4\x78\xdb\x74\x3d\x8b\xb5"; -static const u8 __initconst ctext12[735] = +static const u8 __initconst ctext12[735] __nonstring = "\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20" "\xbb\xb1\x12\x7f\x41\xea\xb3\xc0" "\xa2\xb4\x37\x19\x11\x58\xb6\x0b" @@ -559,9 +559,9 @@ static struct { const u8 *ptext; const u8 *ctext; - u8 key[AES_MAX_KEY_SIZE]; - u8 iv[GCM_AES_IV_SIZE]; - u8 assoc[20]; + u8 key[AES_MAX_KEY_SIZE] __nonstring; + u8 iv[GCM_AES_IV_SIZE] __nonstring; + u8 assoc[20] __nonstring; int klen; int clen; From 2f13daee2a72bb962f5fd356c3a263a6f16da965 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 9 Jun 2025 15:45:20 -0700 Subject: [PATCH 306/497] lib/crypto/curve25519-hacl64: Disable KASAN with clang-17 and older After commit 6f110a5e4f99 ("Disable SLUB_TINY for build testing"), which causes CONFIG_KASAN to be enabled in allmodconfig again, arm64 allmodconfig builds with clang-17 and older show an instance of -Wframe-larger-than (which breaks the build with CONFIG_WERROR=y): lib/crypto/curve25519-hacl64.c:757:6: error: stack frame size (2336) exceeds limit (2048) in 'curve25519_generic' [-Werror,-Wframe-larger-than] 757 | void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE], | ^ When KASAN is disabled, the stack usage is roughly quartered: lib/crypto/curve25519-hacl64.c:757:6: error: stack frame size (608) exceeds limit (128) in 'curve25519_generic' [-Werror,-Wframe-larger-than] 757 | void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE], | ^ Using '-Rpass-analysis=stack-frame-layout' shows the following variables and many, many 8-byte spills when KASAN is enabled: Offset: [SP-144], Type: Variable, Align: 8, Size: 40 Offset: [SP-464], Type: Variable, Align: 8, Size: 320 Offset: [SP-784], Type: Variable, Align: 8, Size: 320 Offset: [SP-864], Type: Variable, Align: 32, Size: 80 Offset: [SP-896], Type: Variable, Align: 32, Size: 32 Offset: [SP-1016], Type: Variable, Align: 8, Size: 120 When KASAN is disabled, there are still spills but not at many and the variables list is smaller: Offset: [SP-192], Type: Variable, Align: 32, Size: 80 Offset: [SP-224], Type: Variable, Align: 32, Size: 32 Offset: [SP-344], Type: Variable, Align: 8, Size: 120 Disable KASAN for this file when using clang-17 or older to avoid blowing out the stack, clearing up the warning. Signed-off-by: Nathan Chancellor Acked-by: "Jason A. Donenfeld" Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250609-curve25519-hacl64-disable-kasan-clang-v1-1-08ea0ac5ccff@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 3e79283b617d..18664127ecd6 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -35,6 +35,10 @@ obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o libcurve25519-generic-y := curve25519-fiat32.o libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o libcurve25519-generic-y += curve25519-generic.o +# clang versions prior to 18 may blow out the stack with KASAN +ifeq ($(call clang-min-version, 180000),) +KASAN_SANITIZE_curve25519-hacl64.o := n +endif obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o libcurve25519-y += curve25519.o From a7137b1825b535eb7258b25beeb0d5425e0037d2 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Thu, 12 Jun 2025 08:30:23 +0000 Subject: [PATCH 307/497] drm/i915/pmu: Fix build error with GCOV and AutoFDO enabled i915_pmu.c may fail to build with GCOV and AutoFDO enabled. ../drivers/gpu/drm/i915/i915_pmu.c:116:3: error: call to '__compiletime_assert_487' declared with 'error' attribute: BUILD_BUG_ON failed: bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1 116 | BUILD_BUG_ON(bit > | ^ Here is a way to reproduce the issue: $ git checkout v6.15 $ mkdir build $ ./scripts/kconfig/merge_config.sh -O build -n -m <(cat < Signed-off-by: Tzung-Bi Shih Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250612083023.562585-1-tzungbi@kernel.org (cherry picked from commit 686d773186bf72b739bab7e12eb8665d914676ee) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index e5a188ce3185..990bfaba3ce4 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -112,7 +112,7 @@ static u32 config_mask(const u64 config) { unsigned int bit = config_bit(config); - if (__builtin_constant_p(config)) + if (__builtin_constant_p(bit)) BUILD_BUG_ON(bit > BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); @@ -121,7 +121,7 @@ static u32 config_mask(const u64 config) BITS_PER_TYPE(typeof_member(struct i915_pmu, enable)) - 1); - return BIT(config_bit(config)); + return BIT(bit); } static bool is_engine_event(struct perf_event *event) From c464ce6af332e7c802c36cd337cacf81db05400c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 14 Mar 2025 17:01:34 +0200 Subject: [PATCH 308/497] drm/i915/dsi: Fix off by one in BXT_MIPI_TRANS_VTOTAL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BXT_MIPI_TRANS_VTOTAL must be programmed with vtotal-1 instead of vtotal. Make it so. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250314150136.22564-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 7b3685c9b38c3097f465efec8b24dbed63258cf6) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/vlv_dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c index 346737f15fa9..21c1e10caf68 100644 --- a/drivers/gpu/drm/i915/display/vlv_dsi.c +++ b/drivers/gpu/drm/i915/display/vlv_dsi.c @@ -1056,7 +1056,7 @@ static void bxt_dsi_get_pipe_config(struct intel_encoder *encoder, BXT_MIPI_TRANS_VACTIVE(port)); adjusted_mode->crtc_vtotal = intel_de_read(display, - BXT_MIPI_TRANS_VTOTAL(port)); + BXT_MIPI_TRANS_VTOTAL(port)) + 1; hactive = adjusted_mode->crtc_hdisplay; hfp = intel_de_read(display, MIPI_HFP_COUNT(display, port)); @@ -1260,7 +1260,7 @@ static void set_dsi_timings(struct intel_encoder *encoder, intel_de_write(display, BXT_MIPI_TRANS_VACTIVE(port), adjusted_mode->crtc_vdisplay); intel_de_write(display, BXT_MIPI_TRANS_VTOTAL(port), - adjusted_mode->crtc_vtotal); + adjusted_mode->crtc_vtotal - 1); } intel_de_write(display, MIPI_HACTIVE_AREA_COUNT(display, port), From e6382fcf989074566bb9a54bbd3c514d7bb99397 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Fri, 13 Jun 2025 17:25:33 +0800 Subject: [PATCH 309/497] gpio: spacemit: Add missing MODULE_DEVICE_TABLE The gpio-spacemit-k1 driver can be compiled as a module. Add missing MODULE_DEVICE_TABLE so it can be matched by modalias and automatically loaded by udev. Fixes: d00553240ef8 ("gpio: spacemit: add support for K1 SoC") Signed-off-by: Vivian Wang Reviewed-by: Yixun Lan Link: https://lore.kernel.org/r/20250613-k1-gpio-of-table-v1-1-9015da8fdfdb@iscas.ac.cn Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-spacemit-k1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-spacemit-k1.c b/drivers/gpio/gpio-spacemit-k1.c index f027066365ff..3cc75c701ec4 100644 --- a/drivers/gpio/gpio-spacemit-k1.c +++ b/drivers/gpio/gpio-spacemit-k1.c @@ -278,6 +278,7 @@ static const struct of_device_id spacemit_gpio_dt_ids[] = { { .compatible = "spacemit,k1-gpio" }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, spacemit_gpio_dt_ids); static struct platform_driver spacemit_gpio_driver = { .probe = spacemit_gpio_probe, From dd2d6b7f6f519d078a866a36a625b0297d81c5bc Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Fri, 13 Jun 2025 11:11:11 +0100 Subject: [PATCH 310/497] fs: drop assert in file_seek_cur_needs_f_lock The assert in function file_seek_cur_needs_f_lock() can be triggered very easily because there are many users of vfs_llseek() (such as overlayfs) that do their custom locking around llseek instead of relying on fdget_pos(). Just drop the overzealous assertion. Fixes: da06e3c51794 ("fs: don't needlessly acquire f_lock") Suggested-by: Jan Kara Suggested-by: Mateusz Guzik Signed-off-by: Luis Henriques Link: https://lore.kernel.org/20250613101111.17716-1-luis@igalia.com Signed-off-by: Christian Brauner --- fs/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/file.c b/fs/file.c index 3a3146664cf3..b6db031545e6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1198,8 +1198,12 @@ bool file_seek_cur_needs_f_lock(struct file *file) if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared) return false; - VFS_WARN_ON_ONCE((file_count(file) > 1) && - !mutex_is_locked(&file->f_pos_lock)); + /* + * Note that we are not guaranteed to be called after fdget_pos() on + * this file obj, in which case the caller is expected to provide the + * appropriate locking. + */ + return true; } From b8b8663ac82a2595a0922d30014f60c5547084de Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Sat, 14 Jun 2025 11:20:23 +0200 Subject: [PATCH 311/497] mailmap: add entry for Danilo Krummrich Add an entry to remap my Red Hat address. Link: https://lore.kernel.org/r/20250614092054.161658-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index c8a21d72241f..8b0aac19efcd 100644 --- a/.mailmap +++ b/.mailmap @@ -197,6 +197,7 @@ Daniel Borkmann Daniel Borkmann Daniel Borkmann Daniel Borkmann +Danilo Krummrich David Brownell David Collins David Heidelberg From 8acfb165a492251a08a22a4fa6497a131e8c2609 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 6 Jun 2025 21:04:18 +0200 Subject: [PATCH 312/497] regulator: fan53555: add enable_time support and soft-start times The datasheets for all the fan53555 variants (and clones using the same interface) define so called soft start times, from enabling the regulator until at least some percentage of the output (i.e. 92% for the rk860x types) are available. The regulator framework supports this with the enable_time property but currently the fan53555 driver does not define enable_times for any variant. I ran into a problem with this while testing the new driver for the Rockchip NPUs (rocket), which does runtime-pm including disabling and enabling a rk8602 as needed. When reenabling the regulator while running a load, fatal hangs could be observed while enabling the associated power-domain, which the regulator supplies. Experimentally setting the regulator to always-on, made the issue disappear, leading to the missing delay to let power stabilize. And as expected, setting the enable-time to a non-zero value according to the datasheet also resolved the regulator-issue. The datasheets in nearly all cases only specify "typical" values, except for the fan53555 type 08. There both a typical and maximum value are listed - 40uS apart. For all typical values I've added 100uS to be on the safe side. Individual details for the relevant regulators below: - fan53526: The datasheet for all variants lists a typical value of 150uS, so make that 250uS with safety margin. - fan53555: types 08 and 18 (unsupported) are given a typical enable time of 135uS but also a maximum of 175uS so use that value. All the other types only have a typical time in the datasheet of 300uS, so give a bit margin by setting it to 400uS. - rk8600 + rk8602: Datasheet reports a typical value of 260us, so use 360uS to be safe. - syr82x + syr83x: All datasheets report typical soft-start values of 300uS for these regulators, so use 400uS. - tcs452x: Datasheet sadly does not report a soft-start time, so I've not set an enable-time Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20250606190418.478633-1-heiko@sntech.de Signed-off-by: Mark Brown --- drivers/regulator/fan53555.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c index bd9447dac596..c282236959b1 100644 --- a/drivers/regulator/fan53555.c +++ b/drivers/regulator/fan53555.c @@ -147,6 +147,7 @@ struct fan53555_device_info { unsigned int slew_mask; const unsigned int *ramp_delay_table; unsigned int n_ramp_values; + unsigned int enable_time; unsigned int slew_rate; }; @@ -282,6 +283,7 @@ static int fan53526_voltages_setup_fairchild(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 250; di->vsel_count = FAN53526_NVOLTAGES; return 0; @@ -296,10 +298,12 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di) case FAN53555_CHIP_REV_00: di->vsel_min = 600000; di->vsel_step = 10000; + di->enable_time = 400; break; case FAN53555_CHIP_REV_13: di->vsel_min = 800000; di->vsel_step = 10000; + di->enable_time = 400; break; default: dev_err(di->dev, @@ -311,13 +315,19 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di) case FAN53555_CHIP_ID_01: case FAN53555_CHIP_ID_03: case FAN53555_CHIP_ID_05: + di->vsel_min = 600000; + di->vsel_step = 10000; + di->enable_time = 400; + break; case FAN53555_CHIP_ID_08: di->vsel_min = 600000; di->vsel_step = 10000; + di->enable_time = 175; break; case FAN53555_CHIP_ID_04: di->vsel_min = 603000; di->vsel_step = 12826; + di->enable_time = 400; break; default: dev_err(di->dev, @@ -350,6 +360,7 @@ static int fan53555_voltages_setup_rockchip(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 360; di->vsel_count = FAN53555_NVOLTAGES; return 0; @@ -372,6 +383,7 @@ static int rk8602_voltages_setup_rockchip(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 360; di->vsel_count = RK8602_NVOLTAGES; return 0; @@ -395,6 +407,7 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di) di->slew_mask = CTL_SLEW_MASK; di->ramp_delay_table = slew_rates; di->n_ramp_values = ARRAY_SIZE(slew_rates); + di->enable_time = 400; di->vsel_count = FAN53555_NVOLTAGES; return 0; @@ -594,6 +607,7 @@ static int fan53555_regulator_register(struct fan53555_device_info *di, rdesc->ramp_mask = di->slew_mask; rdesc->ramp_delay_table = di->ramp_delay_table; rdesc->n_ramp_values = di->n_ramp_values; + rdesc->enable_time = di->enable_time; rdesc->owner = THIS_MODULE; rdev = devm_regulator_register(di->dev, &di->desc, config); From 14c9ede9ca4cd078ad76a6ab9617b81074eb58bf Mon Sep 17 00:00:00 2001 From: Gui-Dong Han Date: Fri, 6 Jun 2025 07:16:40 +0000 Subject: [PATCH 313/497] hwmon: (ftsteutates) Fix TOCTOU race in fts_read() In the fts_read() function, when handling hwmon_pwm_auto_channels_temp, the code accesses the shared variable data->fan_source[channel] twice without holding any locks. It is first checked against FTS_FAN_SOURCE_INVALID, and if the check passes, it is read again when used as an argument to the BIT() macro. This creates a Time-of-Check to Time-of-Use (TOCTOU) race condition. Another thread executing fts_update_device() can modify the value of data->fan_source[channel] between the check and its use. If the value is changed to FTS_FAN_SOURCE_INVALID (0xff) during this window, the BIT() macro will be called with a large shift value (BIT(255)). A bit shift by a value greater than or equal to the type width is undefined behavior and can lead to a crash or incorrect values being returned to userspace. Fix this by reading data->fan_source[channel] into a local variable once, eliminating the race condition. Additionally, add a bounds check to ensure the value is less than BITS_PER_LONG before passing it to the BIT() macro, making the code more robust against undefined behavior. This possible bug was found by an experimental static analysis tool developed by our team. Fixes: 1c5759d8ce05 ("hwmon: (ftsteutates) Replace fanX_source with pwmX_auto_channels_temp") Cc: stable@vger.kernel.org Signed-off-by: Gui-Dong Han Link: https://lore.kernel.org/r/20250606071640.501262-1-hanguidong02@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ftsteutates.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/ftsteutates.c b/drivers/hwmon/ftsteutates.c index a3a07662e491..8aeec16a7a90 100644 --- a/drivers/hwmon/ftsteutates.c +++ b/drivers/hwmon/ftsteutates.c @@ -423,13 +423,16 @@ static int fts_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, break; case hwmon_pwm: switch (attr) { - case hwmon_pwm_auto_channels_temp: - if (data->fan_source[channel] == FTS_FAN_SOURCE_INVALID) + case hwmon_pwm_auto_channels_temp: { + u8 fan_source = data->fan_source[channel]; + + if (fan_source == FTS_FAN_SOURCE_INVALID || fan_source >= BITS_PER_LONG) *val = 0; else - *val = BIT(data->fan_source[channel]); + *val = BIT(fan_source); return 0; + } default: break; } From 744c2fe950e936c4d62430de899d6253424200ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jun 2025 11:23:06 +0200 Subject: [PATCH 314/497] hwmon: (occ) Rework attribute registration for stack usage clang produces an output with excessive stack usage when building the occ_setup_sensor_attrs() function, apparently the result of having a lot of struct literals and building with the -fno-strict-overflow option that leads clang to skip some optimization in case the 'attr' pointer overruns: drivers/hwmon/occ/common.c:775:12: error: stack frame size (1392) exceeds limit (1280) in 'occ_setup_sensor_attrs' [-Werror,-Wframe-larger-than] Replace the custom macros for initializing the attributes with a simpler function call that does not run into this corner case. Link: https://godbolt.org/z/Wf1Yx76a5 Fixes: 54076cb3b5ff ("hwmon (occ): Add sensor attributes and register hwmon device") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250610092315.2640039-1-arnd@kernel.org Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/common.c | 210 +++++++++++++++---------------------- 1 file changed, 84 insertions(+), 126 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 9486db249c64..9029ad53790b 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -747,28 +747,29 @@ static ssize_t occ_show_extended(struct device *dev, } /* - * Some helper macros to make it easier to define an occ_attribute. Since these - * are dynamically allocated, we shouldn't use the existing kernel macros which + * A helper to make it easier to define an occ_attribute. Since these + * are dynamically allocated, we cannot use the existing kernel macros which * stringify the name argument. */ -#define ATTR_OCC(_name, _mode, _show, _store) { \ - .attr = { \ - .name = _name, \ - .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ - }, \ - .show = _show, \ - .store = _store, \ -} +static void occ_init_attribute(struct occ_attribute *attr, int mode, + ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf), + ssize_t (*store)(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count), + int nr, int index, const char *fmt, ...) +{ + va_list args; -#define SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index) { \ - .dev_attr = ATTR_OCC(_name, _mode, _show, _store), \ - .index = _index, \ - .nr = _nr, \ -} + va_start(args, fmt); + vsnprintf(attr->name, sizeof(attr->name), fmt, args); + va_end(args); -#define OCC_INIT_ATTR(_name, _mode, _show, _store, _nr, _index) \ - ((struct sensor_device_attribute_2) \ - SENSOR_ATTR_OCC(_name, _mode, _show, _store, _nr, _index)) + attr->sensor.dev_attr.attr.name = attr->name; + attr->sensor.dev_attr.attr.mode = mode; + attr->sensor.dev_attr.show = show; + attr->sensor.dev_attr.store = store; + attr->sensor.index = index; + attr->sensor.nr = nr; +} /* * Allocate and instatiate sensor_device_attribute_2s. It's most efficient to @@ -855,14 +856,15 @@ static int occ_setup_sensor_attrs(struct occ *occ) sensors->extended.num_sensors = 0; } - occ->attrs = devm_kzalloc(dev, sizeof(*occ->attrs) * num_attrs, + occ->attrs = devm_kcalloc(dev, num_attrs, sizeof(*occ->attrs), GFP_KERNEL); if (!occ->attrs) return -ENOMEM; /* null-terminated list */ - occ->group.attrs = devm_kzalloc(dev, sizeof(*occ->group.attrs) * - num_attrs + 1, GFP_KERNEL); + occ->group.attrs = devm_kcalloc(dev, num_attrs + 1, + sizeof(*occ->group.attrs), + GFP_KERNEL); if (!occ->group.attrs) return -ENOMEM; @@ -872,43 +874,33 @@ static int occ_setup_sensor_attrs(struct occ *occ) s = i + 1; temp = ((struct temp_sensor_2 *)sensors->temp.data) + i; - snprintf(attr->name, sizeof(attr->name), "temp%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_temp, NULL, - 0, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 0, i, "temp%d_label", s); attr++; if (sensors->temp.version == 2 && temp->fru_type == OCC_FRU_TYPE_VRM) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_alarm", s); + occ_init_attribute(attr, 0444, show_temp, NULL, + 1, i, "temp%d_alarm", s); } else { - snprintf(attr->name, sizeof(attr->name), - "temp%d_input", s); + occ_init_attribute(attr, 0444, show_temp, NULL, + 1, i, "temp%d_input", s); } - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_temp, NULL, - 1, i); attr++; if (sensors->temp.version > 1) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_fru_type", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, 2, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 2, i, "temp%d_fru_type", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "temp%d_fault", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, 3, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 3, i, "temp%d_fault", s); attr++; if (sensors->temp.version == 0x10) { - snprintf(attr->name, sizeof(attr->name), - "temp%d_max", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_temp, NULL, - 4, i); + occ_init_attribute(attr, 0444, show_temp, NULL, + 4, i, "temp%d_max", s); attr++; } } @@ -917,14 +909,12 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->freq.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), "freq%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_freq, NULL, - 0, i); + occ_init_attribute(attr, 0444, show_freq, NULL, + 0, i, "freq%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "freq%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_freq, NULL, - 1, i); + occ_init_attribute(attr, 0444, show_freq, NULL, + 1, i, "freq%d_input", s); attr++; } @@ -940,32 +930,24 @@ static int occ_setup_sensor_attrs(struct occ *occ) s = (i * 4) + 1; for (j = 0; j < 4; ++j) { - snprintf(attr->name, sizeof(attr->name), - "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_average", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average_interval", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_average_interval", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, - nr++, i); + occ_init_attribute(attr, 0444, show_power, + NULL, nr++, i, + "power%d_input", s); attr++; s++; @@ -977,28 +959,20 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->power.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), - "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 0, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 0, i, "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 1, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 1, i, "power%d_average", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_average_interval", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 2, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 2, i, "power%d_average_interval", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_power, NULL, 3, i); + occ_init_attribute(attr, 0444, show_power, NULL, + 3, i, "power%d_input", s); attr++; } @@ -1006,56 +980,43 @@ static int occ_setup_sensor_attrs(struct occ *occ) } if (sensors->caps.num_sensors >= 1) { - snprintf(attr->name, sizeof(attr->name), "power%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 0, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 0, 0, "power%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 1, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 1, 0, "power%d_cap", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 2, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 2, 0, "power%d_input", s); attr++; - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_not_redundant", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 3, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 3, 0, "power%d_cap_not_redundant", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_max", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 4, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 4, 0, "power%d_cap_max", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_min", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, show_caps, NULL, - 5, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 5, 0, "power%d_cap_min", s); attr++; - snprintf(attr->name, sizeof(attr->name), "power%d_cap_user", - s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0644, show_caps, - occ_store_caps_user, 6, 0); + occ_init_attribute(attr, 0644, show_caps, occ_store_caps_user, + 6, 0, "power%d_cap_user", s); attr++; if (sensors->caps.version > 1) { - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_user_source", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_caps, NULL, 7, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 7, 0, "power%d_cap_user_source", s); attr++; if (sensors->caps.version > 2) { - snprintf(attr->name, sizeof(attr->name), - "power%d_cap_min_soft", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - show_caps, NULL, - 8, 0); + occ_init_attribute(attr, 0444, show_caps, NULL, + 8, 0, + "power%d_cap_min_soft", s); attr++; } } @@ -1064,19 +1025,16 @@ static int occ_setup_sensor_attrs(struct occ *occ) for (i = 0; i < sensors->extended.num_sensors; ++i) { s = i + 1; - snprintf(attr->name, sizeof(attr->name), "extn%d_label", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 0, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 0, i, "extn%d_label", s); attr++; - snprintf(attr->name, sizeof(attr->name), "extn%d_flags", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 1, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 1, i, "extn%d_flags", s); attr++; - snprintf(attr->name, sizeof(attr->name), "extn%d_input", s); - attr->sensor = OCC_INIT_ATTR(attr->name, 0444, - occ_show_extended, NULL, 2, i); + occ_init_attribute(attr, 0444, occ_show_extended, NULL, + 2, i, "extn%d_input", s); attr++; } From 2c021b45c154958566aad0cae9f74ab26a2d5732 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jun 2025 11:25:49 +0200 Subject: [PATCH 315/497] hwmon: (occ) fix unaligned accesses Passing a pointer to an unaligned integer as a function argument is undefined behavior: drivers/hwmon/occ/common.c:492:27: warning: taking address of packed member 'accumulator' of class or structure 'power_sensor_2' may result in an unaligned pointer value [-Waddress-of-packed-member] 492 | val = occ_get_powr_avg(&power->accumulator, | ^~~~~~~~~~~~~~~~~~ drivers/hwmon/occ/common.c:493:13: warning: taking address of packed member 'update_tag' of class or structure 'power_sensor_2' may result in an unaligned pointer value [-Waddress-of-packed-member] 493 | &power->update_tag); | ^~~~~~~~~~~~~~~~~ Move the get_unaligned() calls out of the function and pass these through argument registers instead. Fixes: c10e753d43eb ("hwmon (occ): Add sensor types and versions") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20250610092553.2641094-1-arnd@kernel.org Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/common.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index 9029ad53790b..b3694a4209b9 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -459,12 +459,10 @@ static ssize_t occ_show_power_1(struct device *dev, return sysfs_emit(buf, "%llu\n", val); } -static u64 occ_get_powr_avg(u64 *accum, u32 *samples) +static u64 occ_get_powr_avg(u64 accum, u32 samples) { - u64 divisor = get_unaligned_be32(samples); - - return (divisor == 0) ? 0 : - div64_u64(get_unaligned_be64(accum) * 1000000ULL, divisor); + return (samples == 0) ? 0 : + mul_u64_u32_div(accum, 1000000UL, samples); } static ssize_t occ_show_power_2(struct device *dev, @@ -489,8 +487,8 @@ static ssize_t occ_show_power_2(struct device *dev, get_unaligned_be32(&power->sensor_id), power->function_id, power->apss_channel); case 1: - val = occ_get_powr_avg(&power->accumulator, - &power->update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->accumulator), + get_unaligned_be32(&power->update_tag)); break; case 2: val = (u64)get_unaligned_be32(&power->update_tag) * @@ -527,8 +525,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_system\n", get_unaligned_be32(&power->sensor_id)); case 1: - val = occ_get_powr_avg(&power->system.accumulator, - &power->system.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->system.accumulator), + get_unaligned_be32(&power->system.update_tag)); break; case 2: val = (u64)get_unaligned_be32(&power->system.update_tag) * @@ -541,8 +539,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_proc\n", get_unaligned_be32(&power->sensor_id)); case 5: - val = occ_get_powr_avg(&power->proc.accumulator, - &power->proc.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->proc.accumulator), + get_unaligned_be32(&power->proc.update_tag)); break; case 6: val = (u64)get_unaligned_be32(&power->proc.update_tag) * @@ -555,8 +553,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_vdd\n", get_unaligned_be32(&power->sensor_id)); case 9: - val = occ_get_powr_avg(&power->vdd.accumulator, - &power->vdd.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->vdd.accumulator), + get_unaligned_be32(&power->vdd.update_tag)); break; case 10: val = (u64)get_unaligned_be32(&power->vdd.update_tag) * @@ -569,8 +567,8 @@ static ssize_t occ_show_power_a0(struct device *dev, return sysfs_emit(buf, "%u_vdn\n", get_unaligned_be32(&power->sensor_id)); case 13: - val = occ_get_powr_avg(&power->vdn.accumulator, - &power->vdn.update_tag); + val = occ_get_powr_avg(get_unaligned_be64(&power->vdn.accumulator), + get_unaligned_be32(&power->vdn.update_tag)); break; case 14: val = (u64)get_unaligned_be32(&power->vdn.update_tag) * From c25892b7a1744355e16281cd24a9b59ec15ec974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Wed, 11 Jun 2025 17:26:12 +0100 Subject: [PATCH 316/497] hwmon: (ltc4282) avoid repeated register write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fault enabled bits were being mistankenly enabled twice in case the FW property is present. Remove one of the writes. Fixes: cbc29538dbf7 ("hwmon: Add driver for LTC4282") Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20250611-fix-ltc4282-repetead-write-v1-1-fe46edd08cf1@analog.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc4282.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/hwmon/ltc4282.c b/drivers/hwmon/ltc4282.c index 7f38d2696239..f607fe8f7937 100644 --- a/drivers/hwmon/ltc4282.c +++ b/drivers/hwmon/ltc4282.c @@ -1511,13 +1511,6 @@ static int ltc4282_setup(struct ltc4282_state *st, struct device *dev) return ret; } - if (device_property_read_bool(dev, "adi,fault-log-enable")) { - ret = regmap_set_bits(st->map, LTC4282_ADC_CTRL, - LTC4282_FAULT_LOG_EN_MASK); - if (ret) - return ret; - } - if (device_property_read_bool(dev, "adi,fault-log-enable")) { ret = regmap_set_bits(st->map, LTC4282_ADC_CTRL, LTC4282_FAULT_LOG_EN_MASK); if (ret) From 070b315333ee942f2cc69d02a864945c9bc27ef2 Mon Sep 17 00:00:00 2001 From: Chun-Tse Shao Date: Wed, 21 May 2025 15:44:44 -0700 Subject: [PATCH 317/497] perf test: Restrict uniquifying test to machines with 'uncore_imc' The test would fail if target machine does not have 'uncore_imc' devices. Since event uniquifying behavior is similar among different architectures, we are restricting the test to only run on machines with `uncore_imc` devices. Suggested-by: Ian Rogers Signed-off-by: Chun-Tse Shao Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250521224513.1104129-1-ctshao@google.com [ Skip the test, i.e. return 2, instead of returning 0 as if the test had succeed ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+event_uniquifying.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/shell/stat+event_uniquifying.sh b/tools/perf/tests/shell/stat+event_uniquifying.sh index 5ec35c52b7d9..bf54bd6c3e2e 100755 --- a/tools/perf/tests/shell/stat+event_uniquifying.sh +++ b/tools/perf/tests/shell/stat+event_uniquifying.sh @@ -9,7 +9,8 @@ perf_tool=perf err=0 test_event_uniquifying() { - # We use `clockticks` to verify the uniquify behavior. + # We use `clockticks` in `uncore_imc` to verify the uniquify behavior. + pmu="uncore_imc" event="clockticks" # If the `-A` option is added, the event should be uniquified. @@ -43,11 +44,18 @@ test_event_uniquifying() { echo "stat event uniquifying test" uniquified_event_array=() + # Skip if the machine does not have `uncore_imc` device. + if ! ${perf_tool} list pmu | grep -q ${pmu}; then + echo "Target does not support PMU ${pmu} [Skipped]" + err=2 + return + fi + # Check how many uniquified events. while IFS= read -r line; do uniquified_event=$(echo "$line" | awk '{print $1}') uniquified_event_array+=("${uniquified_event}") - done < <(${perf_tool} list -v ${event} | grep "\[Kernel PMU event\]") + done < <(${perf_tool} list -v ${event} | grep ${pmu}) perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true" $perf_command From 11cfaf37d6a79447bde8192e2e3bb2877932b82b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Jun 2025 12:32:06 -0300 Subject: [PATCH 318/497] tools headers: Update the fs headers with the kernel sources To pick up changes from: 5d894321c49e6137 ("fs: add atomic write unit max opt to statx") a516403787e08119 ("fs/proc: extend the PAGEMAP_SCAN ioctl to report guard regions") c07d3aede2b26830 ("fscrypt: add support for hardware-wrapped keys") These are used to beautify fs syscall arguments, albeit the changes in this update are not affecting those beautifiers. This addresses these tools/ build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/fscrypt.h include/uapi/linux/fscrypt.h diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h diff -u tools/perf/trace/beauty/include/uapi/linux/stat.h include/uapi/linux/stat.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Adrian Hunter Cc: Andrei Vagin Cc: Andrew Morton Cc: Andrii Nakryiko Cc: Darrick J. Wong Cc: Eric Biggers Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Liam R. Howlett Cc: Namhyung Kim Link: https://lore.kernel.org/r/aEce1keWdO-vGeqe@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fscrypt.h | 6 ++++-- tools/include/uapi/linux/stat.h | 8 ++++++-- tools/perf/trace/beauty/include/uapi/linux/fs.h | 1 + tools/perf/trace/beauty/include/uapi/linux/stat.h | 8 ++++++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index e762e1af650c..0098b0ce8ccb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) +#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; From bbeb1088a40b0da8b832d72e05e1c1cb71535712 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 9 Jun 2025 17:57:42 -0700 Subject: [PATCH 319/497] perf mem: Document new output fields (op, cache, mem, dtlb, snoop) Update the documentation of the new fields with examples and caveats. Also update the related documentation for AMD IBS. Reviewed-by: Ravi Bangoria Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250610005742.2173050-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-amd-ibs.txt | 57 ++++++++++++++++------- tools/perf/Documentation/perf-mem.txt | 50 ++++++++++++++++++++ 2 files changed, 91 insertions(+), 16 deletions(-) diff --git a/tools/perf/Documentation/perf-amd-ibs.txt b/tools/perf/Documentation/perf-amd-ibs.txt index 55f80beae037..548549935760 100644 --- a/tools/perf/Documentation/perf-amd-ibs.txt +++ b/tools/perf/Documentation/perf-amd-ibs.txt @@ -171,23 +171,48 @@ Below is a simple example of the perf mem tool. # perf mem report A normal perf mem report output will provide detailed memory access profile. -However, it can also be aggregated based on output fields. For example: +New output fields will show related access info together. For example: - # perf mem report -F mem,sample,snoop - Samples: 3M of event 'ibs_op//', Event count (approx.): 23524876 - Memory access Samples Snoop - N/A 1903343 N/A - L1 hit 1056754 N/A - L2 hit 75231 N/A - L3 hit 9496 HitM - L3 hit 2270 N/A - RAM hit 8710 N/A - Remote node, same socket RAM hit 3241 N/A - Remote core, same node Any cache hit 1572 HitM - Remote core, same node Any cache hit 514 N/A - Remote node, same socket Any cache hit 1216 HitM - Remote node, same socket Any cache hit 350 N/A - Uncached hit 18 N/A + # perf mem report -F overhead,cache,snoop,comm + ... + # Samples: 92K of event 'ibs_op//' + # Total weight : 531104 + # + # ---------- Cache ----------- --- Snoop ---- + # Overhead L1 L2 L1-buf Other HitM Other Command + # ........ ............................ .............. .......... + # + 76.07% 5.8% 35.7% 0.0% 34.6% 23.3% 52.8% cc1 + 5.79% 0.2% 0.0% 0.0% 5.6% 0.1% 5.7% make + 5.78% 0.1% 4.4% 0.0% 1.2% 0.5% 5.3% gcc + 5.33% 0.3% 3.9% 0.0% 1.1% 0.2% 5.2% as + 5.00% 0.1% 3.8% 0.0% 1.0% 0.3% 4.7% sh + 1.56% 0.1% 0.1% 0.0% 1.4% 0.6% 0.9% ld + 0.28% 0.1% 0.0% 0.0% 0.2% 0.1% 0.2% pkg-config + 0.09% 0.0% 0.0% 0.0% 0.1% 0.0% 0.1% git + 0.03% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% rm + ... + +Also, it can be aggregated based on various memory access info using the +sort keys. For example: + + # perf mem report -s mem,snoop + ... + # Samples: 92K of event 'ibs_op//' + # Total weight : 531104 + # Sort order : mem,snoop + # + # Overhead Samples Memory access Snoop + # ........ ............ ....................................... ............ + # + 47.99% 1509 L2 hit N/A + 25.08% 338 core, same node Any cache hit HitM + 10.24% 54374 N/A N/A + 6.77% 35938 L1 hit N/A + 6.39% 101 core, same node Any cache hit N/A + 3.50% 69 RAM hit N/A + 0.03% 158 LFB/MAB hit N/A + 0.00% 2 Uncached hit N/A Please refer to their man page for more detail. diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 965e73d37772..4d164836d094 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -119,6 +119,22 @@ REPORT OPTIONS And the default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat. +-F:: +--fields=:: + Specify output field - multiple keys can be specified in CSV format. + Please see linkperf:perf-report[1] for details. + + In addition to the default fields, 'perf mem report' will provide the + following fields to break down sample periods. + + - op: operation in the sample instruction (load, store, prefetch, ...) + - cache: location in CPU cache (L1, L2, ...) where the sample hit + - mem: location in memory or other places the sample hit + - dtlb: location in Data TLB (L1, L2) where the sample hit + - snoop: snoop result for the sampled data access + + Please take a look at the OUTPUT FIELD SELECTION section for caveats. + -T:: --type-profile:: Show data-type profile result instead of code symbols. This requires @@ -156,6 +172,40 @@ but one sample with weight 180 and the other with weight 20: 90% [k] memcpy 10% [.] strcmp +OUTPUT FIELD SELECTION +---------------------- +"perf mem report" adds a number of new output fields specific to data source +information in the sample. Some of them have the same name with the existing +sort keys ("mem" and "snoop"). So unlike other fields and sort keys, they'll +behave differently when it's used by -F/--fields or -s/--sort. + +Using those two as output fields will aggregate samples altogether and show +breakdown. + + $ perf mem report -F mem,snoop + ... + # ------ Memory ------- --- Snoop ---- + # RAM Uncach Other HitM Other + # ..................... .............. + # + 3.5% 0.0% 96.5% 25.1% 74.9% + +But using the same name for sort keys will aggregate samples for each type +separately. + + $ perf mem report -s mem,snoop + # Overhead Samples Memory access Snoop + # ........ ............ ....................................... ............ + # + 47.99% 1509 L2 hit N/A + 25.08% 338 core, same node Any cache hit HitM + 10.24% 54374 N/A N/A + 6.77% 35938 L1 hit N/A + 6.39% 101 core, same node Any cache hit N/A + 3.50% 69 RAM hit N/A + 0.03% 158 LFB/MAB hit N/A + 0.00% 2 Uncached hit N/A + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1] From 3ce66a48a685f27a931e5bdffb637751c58df7d9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 9 Jun 2025 15:02:17 -0300 Subject: [PATCH 320/497] tools headers UAPI: Sync linux/prctl.h with the kernel sources to pick FUTEX knob To pick the changes in: 63e8595c060a1fef ("futex: Allow to make the private hash immutable") 80367ad01d93ac78 ("futex: Add basic infrastructure for local task local hash") That adds a FUTEX knob: $ tools/perf/trace/beauty/prctl_option.sh > before $ cp include/uapi/linux/prctl.h tools/perf/trace/beauty/include/uapi/linux/prctl.h $ tools/perf/trace/beauty/prctl_option.sh > after $ diff -u before after --- before 2025-06-09 14:50:45.162579336 -0300 +++ after 2025-06-09 14:50:52.797660024 -0300 @@ -72,6 +72,7 @@ [75] = "SET_SHADOW_STACK_STATUS", [76] = "LOCK_SHADOW_STACK_STATUS", [77] = "TIMER_CREATE_RESTORE_IDS", + [78] = "FUTEX_HASH", }; static const char *prctl_set_mm_options[] = { [1] = "START_CODE", $ That now will be used to decode the syscall option and also to compose filters, for instance: [root@five ~]# perf trace -e syscalls:sys_enter_prctl --filter option==SET_NAME 0.000 Isolated Servi/3474327 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23f13b7aee) 0.032 DOM Worker/3474327 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23deb25670) 7.920 :3474328/3474328 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24fbb10) 7.935 StreamT~s #374/3474328 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24fb970) 8.400 Isolated Servi/3474329 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24bab10) 8.418 StreamT~s #374/3474329 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24ba970) ^C[root@five ~]# This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Acked-by: Sebastian Andrzej Siewior Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/aEiYOtKkrVDT03hZ@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 15c18ef4eb11..43dec6eed559 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -364,4 +364,11 @@ struct prctl_mm_map { # define PR_TIMER_CREATE_RESTORE_IDS_ON 1 # define PR_TIMER_CREATE_RESTORE_IDS_GET 2 +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 + #endif /* _LINUX_PRCTL_H */ From f1e30a4269f9636ffe6b0556f336633fac524ba7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Jun 2025 17:46:03 -0300 Subject: [PATCH 321/497] tools kvm headers arm64: Update KVM header from the kernel sources To pick the changes from: b7628c7973765c85 ("KVM: arm64: Allow userspace to limit the number of PMU counters for EL2 VMs") That doesn't result in any changes in tooling (built on a Raspberry PI 5 running Debian GNU/Linux 12 (bookworm)), only addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Marc Zyngier Cc: Namhyung Kim Link: https://lore.kernel.org/r/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/kvm.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index af9d9acaf997..ed5f3892674c 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -431,10 +431,11 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 From c30c18709587bba7753f8ee2419bacc9100d6b40 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 10:44:07 -0300 Subject: [PATCH 322/497] tools headers UAPI: Sync KVM's vmx.h header with the kernel sources To pick the changes in: 6c441e4d6e729616 ("KVM: TDX: Handle EXIT_REASON_OTHER_SMI") c42856af8f70d983 ("KVM: TDX: Add a place holder for handler of TDX hypercalls (TDG.VP.VMCALL)") That makes 'perf kvm-stat' aware of this new TDCALL exit reason, thus addressing the following perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Isaku Yamahata Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Paolo Bonzini Link: https://lore.kernel.org/r/aErcVn_4plQyODR1@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/vmx.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1b..f0f4a4cf84a7 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -34,6 +34,7 @@ #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 #define EXIT_REASON_SIPI_SIGNAL 4 +#define EXIT_REASON_OTHER_SMI 6 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -92,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_TDCALL 77 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -155,7 +157,8 @@ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ - { EXIT_REASON_NOTIFY, "NOTIFY" } + { EXIT_REASON_NOTIFY, "NOTIFY" }, \ + { EXIT_REASON_TDCALL, "TDCALL" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } From da845e4bf4587cf4a212b607b6d508b00eab5217 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 10:49:47 -0300 Subject: [PATCH 323/497] tools headers x86 svm: Sync svm headers with the kernel sources To pick the changes in: 827547bc3a2a2af6 ("KVM: SVM: Add architectural definitions/assets for Bus Lock Threshold") That triggers: CC /tmp/build/perf-tools/arch/x86/util/kvm-stat.o LD /tmp/build/perf-tools/arch/x86/util/perf-util-in.o LD /tmp/build/perf-tools/arch/x86/perf-util-in.o LD /tmp/build/perf-tools/arch/perf-util-in.o LD /tmp/build/perf-tools/perf-util-in.o AR /tmp/build/perf-tools/libperf-util.a LINK /tmp/build/perf-tools/perf The SVM_EXIT_BUS_LOCK exit reason was added to SVM_EXIT_REASONS, used in kvm-stat.c. This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Nikunj A Dadhania Cc: Sean Christopherson Link: https://lore.kernel.org/r/aErcjuTTCVEZ-8Nb@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/svm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index ec1321248dac..9c640a521a67 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUS_LOCK 0x0a5 #define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 @@ -225,6 +226,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_BUS_LOCK, "buslock" }, \ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ From 8fc50bec411e9cc7bba441dc6f7de9af30ea9cbb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 11:31:41 -0300 Subject: [PATCH 324/497] tools headers UAPI: Sync kvm header with the kernel sources To pick the changes in: c9c1e20b4c7d60fa ("KVM: x86: Introduce Intel specific quirk KVM_X86_QUIRK_IGNORE_GUEST_PAT") 012426d6f59cab21 ("KVM: TDX: Finalize VM initialization") c846b451d3c5d4ba ("KVM: TDX: Add an ioctl to create initial guest memory") 488808e682e72bdb ("KVM: x86: Introduce KVM_TDX_GET_CPUID") a50f673f25e0ba2b ("KVM: TDX: Do TDX specific vcpu initialization") 0186dd29a251866d ("KVM: TDX: add ioctl to initialize VM with TDX specific parameters") 61bb28279623b636 ("KVM: TDX: Get system-wide info about TDX module on initialization") b2aaf38ced6905b8 ("KVM: TDX: Add place holder for TDX VM specific mem_enc_op ioctl") This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Cc: Adrian Hunter Cc: Ian Rogers Cc: Isaku Yamahata Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Xiaoyao Li Cc: Yan Zhao Link: https://lore.kernel.org/r/aErqLPktXIzGyS-m@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 71 +++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index b663d916f162..6f3499507c5e 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -441,6 +441,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) +#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 @@ -931,4 +932,74 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SNP_VM 4 #define KVM_X86_TDX_VM 5 +/* Trust Domain eXtension sub-ioctl() commands. */ +enum kvm_tdx_cmd_id { + KVM_TDX_CAPABILITIES = 0, + KVM_TDX_INIT_VM, + KVM_TDX_INIT_VCPU, + KVM_TDX_INIT_MEM_REGION, + KVM_TDX_FINALIZE_VM, + KVM_TDX_GET_CPUID, + + KVM_TDX_CMD_NR_MAX, +}; + +struct kvm_tdx_cmd { + /* enum kvm_tdx_cmd_id */ + __u32 id; + /* flags for sub-commend. If sub-command doesn't use this, set zero. */ + __u32 flags; + /* + * data for each sub-command. An immediate or a pointer to the actual + * data in process virtual address. If sub-command doesn't use it, + * set zero. + */ + __u64 data; + /* + * Auxiliary error code. The sub-command may return TDX SEAMCALL + * status code in addition to -Exxx. + */ + __u64 hw_error; +}; + +struct kvm_tdx_capabilities { + __u64 supported_attrs; + __u64 supported_xfam; + __u64 reserved[254]; + + /* Configurable CPUID bits for userspace */ + struct kvm_cpuid2 cpuid; +}; + +struct kvm_tdx_init_vm { + __u64 attributes; + __u64 xfam; + __u64 mrconfigid[6]; /* sha384 digest */ + __u64 mrowner[6]; /* sha384 digest */ + __u64 mrownerconfig[6]; /* sha384 digest */ + + /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */ + __u64 reserved[12]; + + /* + * Call KVM_TDX_INIT_VM before vcpu creation, thus before + * KVM_SET_CPUID2. + * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the + * TDX module directly virtualizes those CPUIDs without VMM. The user + * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with + * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of + * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX + * module doesn't virtualize. + */ + struct kvm_cpuid2 cpuid; +}; + +#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0) + +struct kvm_tdx_init_mem_region { + __u64 source_addr; + __u64 gpa; + __u64 nr_pages; +}; + #endif /* _ASM_X86_KVM_H */ From 47684bfb7c3405aa8f7bfb31f2aa642ffd2b640f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 11:55:54 -0300 Subject: [PATCH 325/497] perf beauty: Update copy of linux/socket.h with the kernel sources To pick the changes in: b1e904999542ad67 ("net: pass const to msg_data_left()") That don't result in any changes in the tables generated from that header. This silences this perf build warning: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Please see tools/include/uapi/README for details. Cc: Adrian Hunter Cc: Breno Leitao Cc: Ian Rogers Cc: Jakub Kicinski Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/aErrK24XLUILFH_P@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/linux/socket.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index c3322eb3d686..3b262487ec06 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -static inline size_t msg_data_left(struct msghdr *msg) +static inline size_t msg_data_left(const struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); } From c71bc5995409395d5c9094144534912886fd09a7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 12:03:28 -0300 Subject: [PATCH 326/497] tools headers UAPI: Sync the drm/drm.h with the kernel sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Picking the changes from: c2d3a730069545f2 ("drm/syncobj: Extend EXPORT_SYNC_FILE for timeline syncobjs") Silencing these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h No changes in tooling as these are just C comment documentation changes: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after $ Cc: Adrian Hunter Cc: Christian König Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Rob Clark Link: https://lore.kernel.org/r/aErtHs3T2hdPjjHx@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 7fba37b94401..e63a71d3c607 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -905,13 +905,17 @@ struct drm_syncobj_destroy { }; #define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) #define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) struct drm_syncobj_handle { __u32 handle; __u32 flags; __s32 fd; __u32 pad; + + __u64 point; }; struct drm_syncobj_transfer { From aa69783a5920d5924d95a198917fa3dd9c2c84a3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 12:08:45 -0300 Subject: [PATCH 327/497] tools headers UAPI: Sync linux/kvm.h with the kernel sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick the changes in: 5b9db9c16f428ada ("RISC-V: KVM: add KVM_CAP_RISCV_MP_STATE_RESET") a7484c80e5ca1ae0 ("KVM: arm64: Allow userspace to request KVM_ARM_VCPU_EL2*") 79462faa2b2aa89d ("KVM: TDX: Handle TDG.VP.VMCALL") That just rebuilds perf, as these patches don't add any new KVM ioctl to be harvested for the the 'perf trace' ioctl syscall argument beautifiers. This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Adrian Hunter Cc: Anup Patel Cc: Binbin Wu Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Marc Zyngier Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Radim Krčmář Link: https://lore.kernel.org/r/aEruUUJvR0bfCg7_@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index b6ae8ad8934b..d00b85cb168c 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -375,6 +375,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { @@ -930,6 +931,9 @@ struct kvm_enable_cap { #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 struct kvm_irq_routing_irqchip { __u32 irqchip; From 00c8fde72fe2cdbd3892fbdc338c2d3a66ec8db3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 12:29:23 -0300 Subject: [PATCH 328/497] tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench' Also add SYM_PIC_ALIAS() to tools/perf/util/include/linux/linkage.h. This is to get the changes from: 419cbaf6a56a6e4b ("x86/boot: Add a bunch of PIC aliases") That addresses these perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Cc: Adrian Hunter Cc: Ard Biesheuvel Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/aEry7L3fibwIG5au@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 1 + tools/arch/x86/lib/memset_64.S | 1 + tools/perf/util/include/linux/linkage.h | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 59cf6f9065aa..ccc3d923fc1e 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -40,6 +40,7 @@ SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) +SYM_PIC_ALIAS(memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index d66b710d628f..fb5a03cf5ab7 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -42,6 +42,7 @@ SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) +SYM_PIC_ALIAS(memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 178b00205fe6..89979ca23c3f 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -132,4 +132,8 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#ifndef SYM_PIC_ALIAS +#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ From 3417404c6f83c5d72d69ae81ded914bdc3841a49 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 12:53:37 -0300 Subject: [PATCH 329/497] tools headers: Syncronize linux/build_bug.h with the kernel sources To pick up the changes in: 243c90e917f5cfc9 ("build_bug.h: more user friendly error messages in BUILD_BUG_ON_ZERO()") This also needed to pick the __BUILD_BUG_ON_ZERO_MSG() in linux/compiler.h, that needed to be polished to avoid hitting old clang problems with _Static_assert on arrays of structs: Debian clang version 11.0.1-2~deb10u1 Debian clang version 11.0.1-2~deb10u1 $ make NO_LIBTRACEEVENT=1 ARCH= CROSS_COMPILE= EXTRA_CFLAGS= -C tools/perf O=/tmp/build/perf CC=clang btf_dump.c:895:18: error: type name does not allow storage class to be specified for (i = 0; i < ARRAY_SIZE(pads); i++) { ^ /git/perf-6.16.0-rc1/tools/include/linux/kernel.h:91:59: note: expanded from macro 'ARRAY_SIZE' #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) ^ /git/perf-6.16.0-rc1/tools/include/linux/compiler-gcc.h:26:28: note: expanded from macro '__must_be_array' #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) ^ /git/perf-6.16.0-rc1/tools/include/linux/build_bug.h:17:2: note: expanded from macro 'BUILD_BUG_ON_ZERO' __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") ^ /git/perf-6.16.0-rc1/tools/include/linux/compiler.h:248:67: note: expanded from macro '__BUILD_BUG_ON_ZERO_MSG' #define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) ^ /usr/include/x86_64-linux-gnu/sys/cdefs.h:438:5: note: expanded from macro '_Static_assert' extern int (*__Static_assert_function (void)) \ ^ These also failed: toolsbuilder@five:~$ grep FAIL dm.log/summary | grep clang 1 72.87 almalinux:8 : FAIL clang version 19.1.7 ( 19.1.7-2.module_el8.10.0+3990+33d0d926) 15 73.39 centos:stream : FAIL clang version 17.0.6 (Red Hat 17.0.6-1.module_el8+767+9fa966b8) 36 87.14 opensuse:15.4 : FAIL clang version 15.0.7 37 80.08 opensuse:15.5 : FAIL clang version 15.0.7 40 72.12 oraclelinux:8 : FAIL clang version 16.0.6 (Red Hat 16.0.6-2.0.1.module+el8.9.0+90129+d3ee8717) 42 74.12 rockylinux:8 : FAIL clang version 16.0.6 (Red Hat 16.0.6-2.module+el8.9.0+1651+e10a8f6d) toolsbuilder@five:~$ This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/linux/build_bug.h include/linux/build_bug.h Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Vincent Mailhol Cc: Yury Norov Link: https://lore.kernel.org/r/aEszb7SSIJB6Lp6f@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/build_bug.h | 10 +++++----- tools/include/linux/compiler.h | 8 ++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index b4898ff085de..ab2aa97bd8ce 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index d627e66a04a6..33411ca0cc90 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -244,6 +244,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#ifndef __BUILD_BUG_ON_ZERO_MSG +#if defined(__clang__) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); }))) +#else +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif +#endif + #endif /* __ASSEMBLY__ */ #endif /* _TOOLS_LINUX_COMPILER_H */ From 6143374c6dc874d301dc16612aed144bf4544bae Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 17:58:05 -0300 Subject: [PATCH 330/497] tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes from these csets: 159013a7ca18c271 ("x86/its: Enumerate Indirect Target Selection (ITS) bug") f4138de5e41fae1a ("x86/msr: Standardize on u64 in ") ec980e4facef8110 ("perf/x86/intel: Support auto counter reload") That cause no changes to tooling as it doesn't include a new MSR to be captured by the tools/perf/trace/beauty/tracepoints/x86_msr.sh script. Just silences this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Cc: Adrian Hunter Cc: Dave Hansen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Pawan Gupta Cc: Peter Zijlstra Link: https://lore.kernel.org/r/aEtAUg83OQGx8Kay@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e7d2f460fcc6..b7dded3c8113 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -533,7 +533,7 @@ #define MSR_HWP_CAPABILITIES 0x00000771 #define MSR_HWP_REQUEST_PKG 0x00000772 #define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_REQUEST 0x00000774 #define MSR_HWP_STATUS 0x00000777 /* CPUID.6.EAX */ @@ -550,16 +550,16 @@ #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24) #define HWP_EPP_PERFORMANCE 0x00 #define HWP_EPP_BALANCE_PERFORMANCE 0x80 #define HWP_EPP_BALANCE_POWERSAVE 0xC0 #define HWP_EPP_POWERSAVE 0xFF -#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) +#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42) /* IA32_HWP_STATUS */ #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) @@ -602,7 +602,11 @@ /* V6 PMON MSR range */ #define MSR_IA32_PMC_V6_GP0_CTR 0x1900 #define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902 +#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903 #define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982 +#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983 #define MSR_IA32_PMC_V6_STEP 4 /* KeyID partitioning between MKTME and TDX */ From d8ab68bdb294b09a761e967dad374f2965e1913f Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Thu, 12 Jun 2025 12:15:56 +0200 Subject: [PATCH 331/497] scsi: target: Fix NULL pointer dereference in core_scsi3_decode_spec_i_port() The function core_scsi3_decode_spec_i_port(), in its error code path, unconditionally calls core_scsi3_lunacl_undepend_item() passing the dest_se_deve pointer, which may be NULL. This can lead to a NULL pointer dereference if dest_se_deve remains unset. SPC-3 PR SPEC_I_PT: Unable to locate dest_tpg Unable to handle kernel paging request at virtual address dfff800000000012 Call trace: core_scsi3_lunacl_undepend_item+0x2c/0xf0 [target_core_mod] (P) core_scsi3_decode_spec_i_port+0x120c/0x1c30 [target_core_mod] core_scsi3_emulate_pro_register+0x6b8/0xcd8 [target_core_mod] target_scsi3_emulate_pr_out+0x56c/0x840 [target_core_mod] Fix this by adding a NULL check before calling core_scsi3_lunacl_undepend_item() Signed-off-by: Maurizio Lombardi Link: https://lore.kernel.org/r/20250612101556.24829-1-mlombard@redhat.com Reviewed-by: Mike Christie Reviewed-by: John Meneghini Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 34cf2c399b39..70905805cb17 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1842,7 +1842,9 @@ core_scsi3_decode_spec_i_port( } kmem_cache_free(t10_pr_reg_cache, dest_pr_reg); - core_scsi3_lunacl_undepend_item(dest_se_deve); + + if (dest_se_deve) + core_scsi3_lunacl_undepend_item(dest_se_deve); if (is_local) continue; From 594902c986e269660302f09df9ec4bf1cf017b77 Mon Sep 17 00:00:00 2001 From: Qinyun Tan Date: Sat, 31 May 2025 02:20:53 +0800 Subject: [PATCH 332/497] x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain structure representing a NUMA node relies on the cacheinfo interface (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map) for monitoring. The L3 cache information of a SNC NUMA node determines which domains are summed for the "top level" L3-scoped events. rdt_mon_domain::ci is initialized using the first online CPU of a NUMA node. When this CPU goes offline, its shared_cpu_map is cleared to contain only the offline CPU itself. Subsequently, attempting to read counters via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning zero values for "top-level events" without any error indication. Replace the cacheinfo references in struct rdt_mon_domain and struct rmid_read with the cacheinfo ID (a unique identifier for the L3 cache). rdt_domain_hdr::cpu_mask contains the online CPUs associated with that domain. When reading "top-level events", select a CPU from rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine valid CPUs for reading RMID counter via the MSR interface. Considering all CPUs associated with the L3 cache improves the chances of picking a housekeeping CPU on which the counter reading work can be queued, avoiding an unnecessary IPI. Fixes: 328ea68874642 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files") Signed-off-by: Qinyun Tan Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Tony Luck Link: https://lore.kernel.org/20250530182053.37502-2-qinyuntan@linux.alibaba.com --- arch/x86/kernel/cpu/resctrl/core.c | 6 ++++-- fs/resctrl/ctrlmondata.c | 13 +++++++++---- fs/resctrl/internal.h | 4 ++-- fs/resctrl/monitor.c | 6 ++++-- fs/resctrl/rdtgroup.c | 6 +++--- include/linux/resctrl.h | 4 ++-- 6 files changed, 24 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 7109cbfcad4f..187d527ef73b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) struct rdt_hw_mon_domain *hw_dom; struct rdt_domain_hdr *hdr; struct rdt_mon_domain *d; + struct cacheinfo *ci; int err; lockdep_assert_held(&domain_list_lock); @@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) d = &hw_dom->d_resctrl; d->hdr.id = id; d->hdr.type = RESCTRL_MON_DOMAIN; - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); - if (!d->ci) { + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) { pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); mon_domain_free(hw_dom); return; } + d->ci_id = ci->id; cpumask_set_cpu(cpu, &d->hdr.cpu_mask); arch_mon_domain_online(r, d); diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 6ed2dfd4dbbd..d98e0d2de09f 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) struct rmid_read rr = {0}; struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; + int domid, cpu, ret = 0; struct rdt_resource *r; + struct cacheinfo *ci; struct mon_data *md; - int domid, ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * one that matches this cache id. */ list_for_each_entry(d, &r->mon_domains, hdr.list) { - if (d->ci->id == domid) { - rr.ci = d->ci; + if (d->ci_id == domid) { + rr.ci_id = d->ci_id; + cpu = cpumask_any(&d->hdr.cpu_mask); + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) + continue; mon_event_read(&rr, r, NULL, rdtgrp, - &d->ci->shared_cpu_map, evtid, false); + &ci->shared_cpu_map, evtid, false); goto checkresult; } } diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 9a8cf6f11151..0a1eedba2b03 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -98,7 +98,7 @@ struct mon_data { * domains in @r sharing L3 @ci.id * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -112,7 +112,7 @@ struct rmid_read { struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; - struct cacheinfo *ci; + unsigned int ci_id; int err; u64 val; void *arch_mon_ctx; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index bde2801289d3..f5637855c3ac 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); struct rdt_mon_domain *d; + struct cacheinfo *ci; struct mbm_state *m; int err, ret; u64 tval = 0; @@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) } /* Summing domains that share a cache, must be on a CPU for that cache. */ - if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci || ci->id != rr->ci_id) return -EINVAL; /* @@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) */ ret = -EINVAL; list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { - if (d->ci->id != rr->ci->id) + if (d->ci_id != rr->ci_id) continue; err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, rr->evtid, &tval, rr->arch_mon_ctx); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 1beb124e25f6..77d08229d855 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, char name[32]; snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); if (snc_mode) sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); @@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return -EPERM; list_for_each_entry(mevt, &r->evt_list, list) { - domid = do_sum ? d->ci->id : d->hdr.id; + domid = do_sum ? d->ci_id : d->hdr.id; priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum); if (WARN_ON_ONCE(!priv)) return -EINVAL; @@ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, lockdep_assert_held(&rdtgroup_mutex); snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); if (kn) { /* diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 9ba771f2ddea..6fb4894b8cfd 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -159,7 +159,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types - * @ci: cache info for this domain + * @ci_id: cache info id for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -170,7 +170,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; - struct cacheinfo *ci; + unsigned int ci_id; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; From 9d4204a8106fe7dc80e3f2e440c8f2ba1ba47319 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 15 Jun 2025 18:06:54 -0700 Subject: [PATCH 333/497] lib/crypto/poly1305: Fix arm64's poly1305_blocks_arch() For some reason arm64's Poly1305 code got changed to ignore the padbit argument. As a result, the output is incorrect when the message length is not a multiple of 16 (which is not reached with the standard ChaCha20Poly1305, but bcachefs could reach this). Fix this. Fixes: a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface") Reported-by: Kent Overstreet Tested-by: Kent Overstreet Link: https://lore.kernel.org/r/20250616010654.367302-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/arm64/lib/crypto/poly1305-glue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/lib/crypto/poly1305-glue.c b/arch/arm64/lib/crypto/poly1305-glue.c index 6a661cf04821..c9a74766785b 100644 --- a/arch/arm64/lib/crypto/poly1305-glue.c +++ b/arch/arm64/lib/crypto/poly1305-glue.c @@ -38,14 +38,14 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int todo = min_t(unsigned int, len, SZ_4K); kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, 1); + poly1305_blocks_neon(state, src, todo, padbit); kernel_neon_end(); len -= todo; src += todo; } while (len); } else - poly1305_blocks(state, src, len, 1); + poly1305_blocks(state, src, len, padbit); } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); From b2e673ae53ef4b943f68585207a5f21cfc9a0714 Mon Sep 17 00:00:00 2001 From: Avadhut Naik Date: Fri, 13 Jun 2025 00:51:35 +0000 Subject: [PATCH 334/497] EDAC/amd64: Correct number of UMCs for family 19h models 70h-7fh AMD's Family 19h-based Models 70h-7fh support 4 unified memory controllers (UMC) per processor die. The amd64_edac driver, however, assumes only 2 UMCs are supported since max_mcs variable for the models has not been explicitly set to 4. The same results in incomplete or incorrect memory information being logged to dmesg by the module during initialization in some instances. Fixes: 6c79e42169fe ("EDAC/amd64: Add support for ECC on family 19h model 60h-7Fh") Closes: https://lore.kernel.org/all/27dc093f-ce27-4c71-9e81-786150a040b6@reox.at/ Reported-by: reox Signed-off-by: Avadhut Naik Signed-off-by: Borislav Petkov (AMD) Cc: stable@kernel.org Link: https://lore.kernel.org/20250613005233.2330627-1-avadhut.naik@amd.com --- drivers/edac/amd64_edac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 58b1482a0fbb..b681c0663203 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3879,6 +3879,7 @@ static int per_family_init(struct amd64_pvt *pvt) break; case 0x70 ... 0x7f: pvt->ctl_name = "F19h_M70h"; + pvt->max_mcs = 4; pvt->flags.zn_regs_v2 = 1; break; case 0x90 ... 0x9f: From 1224b218a4b9203656ecc932152f4c81a97b4fcc Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 13 Jun 2025 17:46:20 +0100 Subject: [PATCH 335/497] pldmfw: Select CRC32 when PLDMFW is selected pldmfw calls crc32 code and depends on it being enabled, else there is a link error as follows. So PLDMFW should select CRC32. lib/pldmfw/pldmfw.o: In function `pldmfw_flash_image': pldmfw.c:(.text+0x70f): undefined reference to `crc32_le_base' This problem was introduced by commit b8265621f488 ("Add pldmfw library for PLDM firmware update"). It manifests as of commit d69ea414c9b4 ("ice: implement device flash update via devlink"). And is more likely to occur as of commit 9ad19171b6d6 ("lib/crc: remove unnecessary prompt for CONFIG_CRC32 and drop 'default y'"). Found by chance while exercising builds based on tinyconfig. Fixes: b8265621f488 ("Add pldmfw library for PLDM firmware update") Signed-off-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250613-pldmfw-crc32-v1-1-f3fad109eee6@kernel.org Signed-off-by: Jakub Kicinski --- lib/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig b/lib/Kconfig index 6c1b8f184267..37db228f70a9 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -716,6 +716,7 @@ config GENERIC_LIB_DEVMEM_IS_ALLOWED config PLDMFW bool + select CRC32 default n config ASN1_ENCODER From 2a8a5a5dd06eef580f9818567773fd75057cb875 Mon Sep 17 00:00:00 2001 From: Vitaliy Shevtsov Date: Thu, 12 Jun 2025 21:35:18 +0500 Subject: [PATCH 336/497] scsi: elx: efct: Fix memory leak in efct_hw_parse_filter() strsep() modifies the address of the pointer passed to it so that it no longer points to the original address. This means kfree() gets the wrong pointer. Fix this by passing unmodified pointer returned from kstrdup() to kfree(). Found by Linux Verification Center (linuxtesting.org) with Svace. Fixes: 4df84e846624 ("scsi: elx: efct: Driver initialization routines") Signed-off-by: Vitaliy Shevtsov Link: https://lore.kernel.org/r/20250612163616.24298-1-v.shevtsov@mt-integration.ru Reviewed-by: Daniel Wagner Signed-off-by: Martin K. Petersen --- drivers/scsi/elx/efct/efct_hw.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/elx/efct/efct_hw.c b/drivers/scsi/elx/efct/efct_hw.c index 5a5525054d71..5b079b8b7a08 100644 --- a/drivers/scsi/elx/efct/efct_hw.c +++ b/drivers/scsi/elx/efct/efct_hw.c @@ -1120,7 +1120,7 @@ int efct_hw_parse_filter(struct efct_hw *hw, void *value) { int rc = 0; - char *p = NULL; + char *p = NULL, *pp = NULL; char *token; u32 idx = 0; @@ -1132,6 +1132,7 @@ efct_hw_parse_filter(struct efct_hw *hw, void *value) efc_log_err(hw->os, "p is NULL\n"); return -ENOMEM; } + pp = p; idx = 0; while ((token = strsep(&p, ",")) && *token) { @@ -1144,7 +1145,7 @@ efct_hw_parse_filter(struct efct_hw *hw, void *value) if (idx == ARRAY_SIZE(hw->config.filter_def)) break; } - kfree(p); + kfree(pp); return rc; } From e1bc3a13bd775791cca0bb144d977b00f3598042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 2 Jun 2025 12:14:02 -0300 Subject: [PATCH 337/497] drm/v3d: Avoid NULL pointer dereference in `v3d_job_update_stats()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following kernel Oops was recently reported by Mesa CI: [ 800.139824] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000588 [ 800.148619] Mem abort info: [ 800.151402] ESR = 0x0000000096000005 [ 800.155141] EC = 0x25: DABT (current EL), IL = 32 bits [ 800.160444] SET = 0, FnV = 0 [ 800.163488] EA = 0, S1PTW = 0 [ 800.166619] FSC = 0x05: level 1 translation fault [ 800.171487] Data abort info: [ 800.174357] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 800.179832] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 800.184873] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 800.190176] user pgtable: 4k pages, 39-bit VAs, pgdp=00000001014c2000 [ 800.196607] [0000000000000588] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 800.205305] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP [ 800.211564] Modules linked in: vc4 snd_soc_hdmi_codec drm_display_helper v3d cec gpu_sched drm_dma_helper drm_shmem_helper drm_kms_helper drm drm_panel_orientation_quirks snd_soc_core snd_compress snd_pcm_dmaengine snd_pcm i2c_brcmstb snd_timer snd backlight [ 800.234448] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.25+rpt-rpi-v8 #1 Debian 1:6.12.25-1+rpt1 [ 800.244182] Hardware name: Raspberry Pi 4 Model B Rev 1.4 (DT) [ 800.250005] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 800.256959] pc : v3d_job_update_stats+0x60/0x130 [v3d] [ 800.262112] lr : v3d_job_update_stats+0x48/0x130 [v3d] [ 800.267251] sp : ffffffc080003e60 [ 800.270555] x29: ffffffc080003e60 x28: ffffffd842784980 x27: 0224012000000000 [ 800.277687] x26: ffffffd84277f630 x25: ffffff81012fd800 x24: 0000000000000020 [ 800.284818] x23: ffffff8040238b08 x22: 0000000000000570 x21: 0000000000000158 [ 800.291948] x20: 0000000000000000 x19: ffffff8040238000 x18: 0000000000000000 [ 800.299078] x17: ffffffa8c1bd2000 x16: ffffffc080000000 x15: 0000000000000000 [ 800.306208] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [ 800.313338] x11: 0000000000000040 x10: 0000000000001a40 x9 : ffffffd83b39757c [ 800.320468] x8 : ffffffd842786420 x7 : 7fffffffffffffff x6 : 0000000000ef32b0 [ 800.327598] x5 : 00ffffffffffffff x4 : 0000000000000015 x3 : ffffffd842784980 [ 800.334728] x2 : 0000000000000004 x1 : 0000000000010002 x0 : 000000ba4c0ca382 [ 800.341859] Call trace: [ 800.344294] v3d_job_update_stats+0x60/0x130 [v3d] [ 800.349086] v3d_irq+0x124/0x2e0 [v3d] [ 800.352835] __handle_irq_event_percpu+0x58/0x218 [ 800.357539] handle_irq_event+0x54/0xb8 [ 800.361369] handle_fasteoi_irq+0xac/0x240 [ 800.365458] handle_irq_desc+0x48/0x68 [ 800.369200] generic_handle_domain_irq+0x24/0x38 [ 800.373810] gic_handle_irq+0x48/0xd8 [ 800.377464] call_on_irq_stack+0x24/0x58 [ 800.381379] do_interrupt_handler+0x88/0x98 [ 800.385554] el1_interrupt+0x34/0x68 [ 800.389123] el1h_64_irq_handler+0x18/0x28 [ 800.393211] el1h_64_irq+0x64/0x68 [ 800.396603] default_idle_call+0x3c/0x168 [ 800.400606] do_idle+0x1fc/0x230 [ 800.403827] cpu_startup_entry+0x40/0x50 [ 800.407742] rest_init+0xe4/0xf0 [ 800.410962] start_kernel+0x5e8/0x790 [ 800.414616] __primary_switched+0x80/0x90 [ 800.418622] Code: 8b170277 8b160296 11000421 b9000861 (b9401ac1) [ 800.424707] ---[ end trace 0000000000000000 ]--- [ 800.457313] ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- This issue happens when the file descriptor is closed before the jobs submitted by it are completed. When the job completes, we update the global GPU stats and the per-fd GPU stats, which are exposed through fdinfo. If the file descriptor was closed, then the struct `v3d_file_priv` and its stats were already freed and we can't update the per-fd stats. Therefore, if the file descriptor was already closed, don't update the per-fd GPU stats, only update the global ones. Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Jose Maria Casanova Crespo Link: https://lore.kernel.org/r/20250602151451.10161-1-mcanal@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_sched.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 35f131a46d07..42df9d3567e7 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -199,7 +199,6 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) struct v3d_dev *v3d = job->v3d; struct v3d_file_priv *file = job->file->driver_priv; struct v3d_stats *global_stats = &v3d->queue[queue].stats; - struct v3d_stats *local_stats = &file->stats[queue]; u64 now = local_clock(); unsigned long flags; @@ -209,7 +208,12 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue) else preempt_disable(); - v3d_stats_update(local_stats, now); + /* Don't update the local stats if the file context has already closed */ + if (file) + v3d_stats_update(&file->stats[queue], now); + else + drm_dbg(&v3d->drm, "The file descriptor was closed before job completion\n"); + v3d_stats_update(global_stats, now); if (IS_ENABLED(CONFIG_LOCKDEP)) From 86c8db86af43f52f682e53a0f2f0828683be1e52 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 13 Jun 2025 15:37:05 -0400 Subject: [PATCH 338/497] selinux: fix selinux_xfrm_alloc_user() to set correct ctx_len MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should count the terminating NUL byte as part of the ctx_len. Otherwise, UBSAN logs a warning: UBSAN: array-index-out-of-bounds in security/selinux/xfrm.c:99:14 index 60 is out of range for type 'char [*]' The allocation itself is correct so there is no actual out of bounds indexing, just a warning. Cc: stable@vger.kernel.org Suggested-by: Christian Göttsche Link: https://lore.kernel.org/selinux/CAEjxPJ6tA5+LxsGfOJokzdPeRomBHjKLBVR6zbrg+_w3ZZbM3A@mail.gmail.com/ Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/xfrm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 90ec4ef1b082..61d56b0c2be1 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -94,7 +94,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, ctx->ctx_doi = XFRM_SC_DOI_LSM; ctx->ctx_alg = XFRM_SC_ALG_SELINUX; - ctx->ctx_len = str_len; + ctx->ctx_len = str_len + 1; memcpy(ctx->ctx_str, &uctx[1], str_len); ctx->ctx_str[str_len] = '\0'; rc = security_context_to_sid(ctx->ctx_str, str_len, From 61ee19dedb8d753249e20308782bf4e9e2fb7344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 2 Jun 2025 10:22:16 -0300 Subject: [PATCH 339/497] drm/etnaviv: Protect the scheduler's pending list with its lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 704d3d60fec4 ("drm/etnaviv: don't block scheduler when GPU is still active") ensured that active jobs are returned to the pending list when extending the timeout. However, it didn't use the pending list's lock to manipulate the list, which causes a race condition as the scheduler's workqueues are running. Hold the lock while manipulating the scheduler's pending list to prevent a race. Cc: stable@vger.kernel.org Fixes: 704d3d60fec4 ("drm/etnaviv: don't block scheduler when GPU is still active") Reported-by: Philipp Stanner Closes: https://lore.kernel.org/dri-devel/964e59ba1539083ef29b06d3c78f5e2e9b138ab8.camel@mailbox.org/ Reviewed-by: Lucas Stach Reviewed-by: Philipp Stanner Link: https://lore.kernel.org/r/20250602132240.93314-1-mcanal@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 76a3a3e517d8..71e2e6b9d713 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -35,6 +35,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) { struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job); + struct drm_gpu_scheduler *sched = sched_job->sched; struct etnaviv_gpu *gpu = submit->gpu; u32 dma_addr, primid = 0; int change; @@ -89,7 +90,9 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job return DRM_GPU_SCHED_STAT_NOMINAL; out_no_timeout: - list_add(&sched_job->list, &sched_job->sched->pending_list); + spin_lock(&sched->job_list_lock); + list_add(&sched_job->list, &sched->pending_list); + spin_unlock(&sched->job_list_lock); return DRM_GPU_SCHED_STAT_NOMINAL; } From 6aba0cb5bba6141158d5449f2cf53187b7f755f9 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 5 Jun 2025 11:44:46 +0530 Subject: [PATCH 340/497] RISC-V: KVM: Fix the size parameter check in SBI SFENCE calls As-per the SBI specification, an SBI remote fence operation applies to the entire address space if either: 1) start_addr and size are both 0 2) size is equal to 2^XLEN-1 >From the above, only #1 is checked by SBI SFENCE calls so fix the size parameter check in SBI SFENCE calls to cover #2 as well. Fixes: 13acfec2dbcc ("RISC-V: KVM: Add remote HFENCE functions based on VCPU requests") Reviewed-by: Atish Patra Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20250605061458.196003-2-apatel@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_replace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 5fbf3f94f1e8..9752d2ffff68 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -103,7 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask); else kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask, @@ -111,7 +111,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, hbase, hmask, cp->a4); else From 2e7be162996640bbe3b6da694cc064c511b8a5d9 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 5 Jun 2025 11:44:47 +0530 Subject: [PATCH 341/497] RISC-V: KVM: Don't treat SBI HFENCE calls as NOPs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SBI specification clearly states that SBI HFENCE calls should return SBI_ERR_NOT_SUPPORTED when one of the target hart doesn’t support hypervisor extension (aka nested virtualization in-case of KVM RISC-V). Fixes: c7fa3c48de86 ("RISC-V: KVM: Treat SBI HFENCE calls as NOPs") Reviewed-by: Atish Patra Signed-off-by: Anup Patel Link: https://lore.kernel.org/r/20250605061458.196003-3-apatel@ventanamicro.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu_sbi_replace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 9752d2ffff68..b17fad091bab 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -127,9 +127,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: /* * Until nested virtualization is implemented, the - * SBI HFENCE calls should be treated as NOPs + * SBI HFENCE calls should return not supported + * hence fallthrough. */ - break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } From 0a1db19f66c0960eb00e1f2ccd40708b6747f5b1 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Mon, 16 Jun 2025 15:45:03 +0200 Subject: [PATCH 342/497] gpio: pca953x: fix wrong error probe return value The second argument to dev_err_probe() is the error value. Pass the return value of devm_request_threaded_irq() there instead of the irq number. Signed-off-by: Sascha Hauer Fixes: c47f7ff0fe61 ("gpio: pca953x: Utilise dev_err_probe() where it makes sense") Link: https://lore.kernel.org/r/20250616134503.1201138-1-s.hauer@pengutronix.de Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index b852e4997629..e80a96f39788 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -974,7 +974,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, int irq_base) IRQF_ONESHOT | IRQF_SHARED, dev_name(dev), chip); if (ret) - return dev_err_probe(dev, client->irq, "failed to request irq\n"); + return dev_err_probe(dev, ret, "failed to request irq\n"); return 0; } From a7b3b77fd111d49f8e25624e4ea1046322a57baf Mon Sep 17 00:00:00 2001 From: Mikko Korhonen Date: Tue, 17 Jun 2025 09:18:41 +0300 Subject: [PATCH 343/497] ata: ahci: Disallow LPM for Asus B550-F motherboard Asus ROG STRIX B550-F GAMING (WI-FI) motherboard has problems on some SATA ports with at least one hard drive model (WDC WD20EFAX-68FB5N0) when LPM is enabled. Disabling LPM solves the issue. Cc: stable@vger.kernel.org Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type") Signed-off-by: Mikko Korhonen Link: https://lore.kernel.org/r/20250617062055.784827-1-mjkorhon@gmail.com [cassel: more detailed comment, make single line comments consistent] Signed-off-by: Niklas Cassel --- drivers/ata/ahci.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index c8ad8ace7496..e5e5c2e81d09 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1452,7 +1452,23 @@ static bool ahci_broken_lpm(struct pci_dev *pdev) DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_VERSION, "ASUSPRO D840MB_M840SA"), }, - /* 320 is broken, there is no known good version yet. */ + /* 320 is broken, there is no known good version. */ + }, + { + /* + * AMD 500 Series Chipset SATA Controller [1022:43eb] + * on this motherboard timeouts on ports 5 and 6 when + * LPM is enabled, at least with WDC WD20EFAX-68FB5N0 + * hard drives. LPM with the same drive works fine on + * all other ports on the same controller. + */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, + "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_BOARD_NAME, + "ROG STRIX B550-F GAMING (WI-FI)"), + }, + /* 3621 is broken, there is no known good version. */ }, { } /* terminate list */ }; From 7f90d45e57cb2ef1f0adcaf925ddffdfc5e680ca Mon Sep 17 00:00:00 2001 From: Justin Sanders Date: Tue, 10 Jun 2025 17:05:59 +0000 Subject: [PATCH 344/497] aoe: clean device rq_list in aoedev_downdev() An aoe device's rq_list contains accepted block requests that are waiting to be transmitted to the aoe target. This queue was added as part of the conversion to blk_mq. However, the queue was not cleaned out when an aoe device is downed which caused blk_mq_freeze_queue() to sleep indefinitely waiting for those requests to complete, causing a hang. This fix cleans out the queue before calling blk_mq_freeze_queue(). Link: https://bugzilla.kernel.org/show_bug.cgi?id=212665 Fixes: 3582dd291788 ("aoe: convert aoeblk to blk-mq") Signed-off-by: Justin Sanders Link: https://lore.kernel.org/r/20250610170600.869-1-jsanders.devel@gmail.com Tested-By: Valentin Kleibel Signed-off-by: Jens Axboe --- drivers/block/aoe/aoedev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 141b2a0e03f2..8c18034cb3d6 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -198,6 +198,7 @@ aoedev_downdev(struct aoedev *d) { struct aoetgt *t, **tt, **te; struct list_head *head, *pos, *nx; + struct request *rq, *rqnext; int i; d->flags &= ~DEVFL_UP; @@ -223,6 +224,13 @@ aoedev_downdev(struct aoedev *d) /* clean out the in-process request (if any) */ aoe_failip(d); + /* clean out any queued block requests */ + list_for_each_entry_safe(rq, rqnext, &d->rq_list, queuelist) { + list_del_init(&rq->queuelist); + blk_mq_start_request(rq); + blk_mq_end_request(rq, BLK_STS_IOERR); + } + /* fast fail all pending I/O */ if (d->blkq) { /* UP is cleared, freeze+quiesce to insure all are errored */ From cffc873d68ab09a0432b8212008c5613f8a70a2c Mon Sep 17 00:00:00 2001 From: Justin Sanders Date: Tue, 10 Jun 2025 17:06:00 +0000 Subject: [PATCH 345/497] aoe: defer rexmit timer downdev work to workqueue When aoe's rexmit_timer() notices that an aoe target fails to respond to commands for more than aoe_deadsecs, it calls aoedev_downdev() which cleans the outstanding aoe and block queues. This can involve sleeping, such as in blk_mq_freeze_queue(), which should not occur in irq context. This patch defers that aoedev_downdev() call to the aoe device's workqueue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=212665 Signed-off-by: Justin Sanders Link: https://lore.kernel.org/r/20250610170600.869-2-jsanders.devel@gmail.com Tested-By: Valentin Kleibel Signed-off-by: Jens Axboe --- drivers/block/aoe/aoe.h | 1 + drivers/block/aoe/aoecmd.c | 8 ++++++-- drivers/block/aoe/aoedev.c | 5 ++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 749ae1246f4c..d35caa3c69e1 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -80,6 +80,7 @@ enum { DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */ DEVFL_FREED = (1<<8), /* device has been cleaned up */ + DEVFL_DEAD = (1<<9), /* device has timed out of aoe_deadsecs */ }; enum { diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 92b06d1de4cc..6c94cfd1c480 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -754,7 +754,7 @@ rexmit_timer(struct timer_list *timer) utgts = count_targets(d, NULL); - if (d->flags & DEVFL_TKILL) { + if (d->flags & (DEVFL_TKILL | DEVFL_DEAD)) { spin_unlock_irqrestore(&d->lock, flags); return; } @@ -786,7 +786,8 @@ rexmit_timer(struct timer_list *timer) * to clean up. */ list_splice(&flist, &d->factive[0]); - aoedev_downdev(d); + d->flags |= DEVFL_DEAD; + queue_work(aoe_wq, &d->work); goto out; } @@ -898,6 +899,9 @@ aoecmd_sleepwork(struct work_struct *work) { struct aoedev *d = container_of(work, struct aoedev, work); + if (d->flags & DEVFL_DEAD) + aoedev_downdev(d); + if (d->flags & DEVFL_GDALLOC) aoeblk_gdalloc(d); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 8c18034cb3d6..9b66f8ebce50 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -200,8 +200,11 @@ aoedev_downdev(struct aoedev *d) struct list_head *head, *pos, *nx; struct request *rq, *rqnext; int i; + unsigned long flags; - d->flags &= ~DEVFL_UP; + spin_lock_irqsave(&d->lock, flags); + d->flags &= ~(DEVFL_UP | DEVFL_DEAD); + spin_unlock_irqrestore(&d->lock, flags); /* clean out active and to-be-retransmitted buffers */ for (i = 0; i < NFACTIVE; i++) { From a85b8544d46390469b6ca72d6bfd3ecb7be985ff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 14 Jun 2025 00:30:37 +0200 Subject: [PATCH 346/497] wifi: remove zero-length arrays All of these are really meant to be variable-length, and in the case of s1g_beacon it's actually accessed. Make that one in particular, and a couple of others (that aren't used as arrays now), actually variable. Reported-by: syzbot+fd222bb38e916df26fa4@syzkaller.appspotmail.com Fixes: 1e1f706fc2ce ("wifi: cfg80211/mac80211: correctly parse S1G beacon optional elements") Link: https://patch.msgid.link/20250614003037.a3e82e882251.I2e8b58e56ff2a9f8b06c66f036578b7c1d4e4685@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index ce377f7fb912..22f39e5e2ff1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1278,7 +1278,7 @@ struct ieee80211_ext { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; - u8 variable[0]; + u8 variable[]; } __packed s1g_beacon; } u; } __packed __aligned(2); @@ -1536,7 +1536,7 @@ struct ieee80211_mgmt { u8 action_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed tdls_discover_resp; struct { u8 action_code; @@ -1721,35 +1721,35 @@ struct ieee80211_tdls_data { struct { u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_req; struct { __le16 status_code; u8 dialog_token; __le16 capability; - u8 variable[0]; + u8 variable[]; } __packed setup_resp; struct { __le16 status_code; u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed setup_cfm; struct { __le16 reason_code; - u8 variable[0]; + u8 variable[]; } __packed teardown; struct { u8 dialog_token; - u8 variable[0]; + u8 variable[]; } __packed discover_req; struct { u8 target_channel; u8 oper_class; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_req; struct { __le16 status_code; - u8 variable[0]; + u8 variable[]; } __packed chan_switch_resp; } u; } __packed; From d1b1a5eb27c4948e8811cf4dbb05aaf3eb10700c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Jun 2025 17:18:38 +0200 Subject: [PATCH 347/497] wifi: mac80211: drop invalid source address OCB frames In OCB, don't accept frames from invalid source addresses (and in particular don't try to create stations for them), drop the frames instead. Reported-by: syzbot+8b512026a7ec10dcbdd9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/6788d2d9.050a0220.20d369.0028.GAE@google.com/ Signed-off-by: Johannes Berg Tested-by: syzbot+8b512026a7ec10dcbdd9@syzkaller.appspotmail.com Link: https://patch.msgid.link/20250616171838.7433379cab5d.I47444d63c72a0bd58d2e2b67bb99e1fea37eec6f@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 09beb65d6108..e73431549ce7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4432,6 +4432,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) if (!multicast && !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1)) return false; + /* reject invalid/our STA address */ + if (!is_valid_ether_addr(hdr->addr2) || + ether_addr_equal(sdata->dev->dev_addr, hdr->addr2)) + return false; if (!rx->sta) { int rate_idx; if (status->encoding != RX_ENC_LEGACY) From d19bac3d4edc4ab92e90d1c6bf68620192254b4b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Jun 2025 10:49:01 +0200 Subject: [PATCH 348/497] wifi: mac80211: don't WARN for late channel/color switch There's really no value in the WARN stack trace etc., the reason for this happening isn't directly related to the calling function anyway. Also, syzbot has been observing it constantly, and there's no way we can resolve it there - those systems are just slow. Instead print an error message (once) and add a comment about what really causes this message. Reported-by: syzbot+468656785707b0e995df@syzkaller.appspotmail.com Reported-by: syzbot+18c783c5cf6a781e3e2c@syzkaller.appspotmail.com Reported-by: syzbot+d5924d5cffddfccab68e@syzkaller.appspotmail.com Reported-by: syzbot+7d73d99525d1ff7752ef@syzkaller.appspotmail.com Reported-by: syzbot+8e6e002c74d1927edaf5@syzkaller.appspotmail.com Reported-by: syzbot+97254a3b10c541879a65@syzkaller.appspotmail.com Reported-by: syzbot+dfd1fd46a1960ad9c6ec@syzkaller.appspotmail.com Reported-by: syzbot+85e0b8d12d9ca877d806@syzkaller.appspotmail.com Link: https://patch.msgid.link/20250617104902.146e10919be1.I85f352ca4a2dce6f556e5ff45ceaa5f3769cb5ce@changeid Signed-off-by: Johannes Berg --- net/mac80211/debug.h | 5 ++++- net/mac80211/tx.c | 29 +++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 5b81998cb0c9..ef7c1a68d88d 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -1,10 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions - * Copyright (C) 2022 - 2024 Intel Corporation + * Copyright (C) 2022 - 2025 Intel Corporation */ #ifndef __MAC80211_DEBUG_H #define __MAC80211_DEBUG_H +#include #include #ifdef CONFIG_MAC80211_OCB_DEBUG @@ -152,6 +153,8 @@ do { \ else \ _sdata_err((link)->sdata, fmt, ##__VA_ARGS__); \ } while (0) +#define link_err_once(link, fmt, ...) \ + DO_ONCE_LITE(link_err, link, fmt, ##__VA_ARGS__) #define link_id_info(sdata, link_id, fmt, ...) \ do { \ if (ieee80211_vif_is_mld(&sdata->vif)) \ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d8d4f3d7d7f2..d58b80813bdd 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * Transmit and frame generation functions. */ @@ -5016,12 +5016,25 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata, } } -static u8 __ieee80211_beacon_update_cntdwn(struct beacon_data *beacon) +static u8 __ieee80211_beacon_update_cntdwn(struct ieee80211_link_data *link, + struct beacon_data *beacon) { - beacon->cntdwn_current_counter--; + if (beacon->cntdwn_current_counter == 1) { + /* + * Channel switch handling is done by a worker thread while + * beacons get pulled from hardware timers. It's therefore + * possible that software threads are slow enough to not be + * able to complete CSA handling in a single beacon interval, + * in which case we get here. There isn't much to do about + * it, other than letting the user know that the AP isn't + * behaving correctly. + */ + link_err_once(link, + "beacon TX faster than countdown (channel/color switch) completion\n"); + return 0; + } - /* the counter should never reach 0 */ - WARN_ON_ONCE(!beacon->cntdwn_current_counter); + beacon->cntdwn_current_counter--; return beacon->cntdwn_current_counter; } @@ -5052,7 +5065,7 @@ u8 ieee80211_beacon_update_cntdwn(struct ieee80211_vif *vif, unsigned int link_i if (!beacon) goto unlock; - count = __ieee80211_beacon_update_cntdwn(beacon); + count = __ieee80211_beacon_update_cntdwn(link, beacon); unlock: rcu_read_unlock(); @@ -5450,7 +5463,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (beacon->cntdwn_counter_offsets[0]) { if (!is_template) - __ieee80211_beacon_update_cntdwn(beacon); + __ieee80211_beacon_update_cntdwn(link, beacon); ieee80211_set_beacon_cntdwn(sdata, beacon, link); } @@ -5482,7 +5495,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, * for now we leave it consistent with overall * mac80211's behavior. */ - __ieee80211_beacon_update_cntdwn(beacon); + __ieee80211_beacon_update_cntdwn(link, beacon); ieee80211_set_beacon_cntdwn(sdata, beacon, link); } From 91a7703a036b146481b8a0bd6efa6200d296ca5d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Jun 2025 06:41:48 -0600 Subject: [PATCH 349/497] io_uring: remove duplicate io_uring_alloc_task_context() definition This function exists in both tctx.h (where it belongs) and in io_uring.h as a remnant of before the tctx handling code got split out. Remove the io_uring.h definition and ensure that sqpoll.c includes the tctx.h header to get the definition. Signed-off-by: Jens Axboe --- io_uring/io_uring.h | 2 -- io_uring/sqpoll.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index d59c12277d58..66c1ca73f55e 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -98,8 +98,6 @@ struct llist_node *io_handle_tw_list(struct llist_node *node, unsigned int *coun struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, unsigned int *count); void tctx_task_work(struct callback_head *cb); __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); -int io_uring_alloc_task_context(struct task_struct *task, - struct io_ring_ctx *ctx); int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file, int start, int end); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 268d2fbe6160..fa5a6750ee52 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -16,6 +16,7 @@ #include #include "io_uring.h" +#include "tctx.h" #include "napi.h" #include "sqpoll.h" From f2320f1dd6f6f82cb2c7aff23a12bab537bdea89 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Jun 2025 06:43:18 -0600 Subject: [PATCH 350/497] io_uring/sqpoll: don't put task_struct on tctx setup failure A recent commit moved the error handling of sqpoll thread and tctx failures into the thread itself, as part of fixing an issue. However, it missed that tctx allocation may also fail, and that io_sq_offload_create() does its own error handling for the task_struct in that case. Remove the manual task putting in io_sq_offload_create(), as io_sq_thread() will notice that the tctx did not get setup and hence it should put itself and exit. Reported-by: syzbot+763e12bbf004fb1062e4@syzkaller.appspotmail.com Fixes: ac0b8b327a56 ("io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo()") Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index fa5a6750ee52..a3f11349ce06 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -420,7 +420,6 @@ void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) __cold int io_sq_offload_create(struct io_ring_ctx *ctx, struct io_uring_params *p) { - struct task_struct *task_to_put = NULL; int ret; /* Retain compatibility with failing for an invalid attach attempt */ @@ -499,7 +498,7 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, rcu_assign_pointer(sqd->thread, tsk); mutex_unlock(&sqd->lock); - task_to_put = get_task_struct(tsk); + get_task_struct(tsk); ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); if (ret) @@ -514,8 +513,6 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, complete(&ctx->sq_data->exited); err: io_sq_thread_finish(ctx); - if (task_to_put) - put_task_struct(task_to_put); return ret; } From 7b4ac12cc929e281cf7edc22203e0533790ebc2b Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 13 Jun 2025 14:36:29 +0200 Subject: [PATCH 351/497] openvswitch: Allocate struct ovs_pcpu_storage dynamically PERCPU_MODULE_RESERVE defines the maximum size that can by used for the per-CPU data size used by modules. This is 8KiB. Commit 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into one") restructured the per-CPU memory allocation for the module and moved the separate alloc_percpu() invocations at module init time to a static per-CPU variable which is allocated by the module loader. The size of the per-CPU data section for openvswitch is 6488 bytes which is ~80% of the available per-CPU memory. Together with a few other modules it is easy to exhaust the available 8KiB of memory. Allocate ovs_pcpu_storage dynamically at module init time. Reported-by: Gal Pressman Closes: https://lore.kernel.org/all/c401e017-f8db-4f57-a1cd-89beb979a277@nvidia.com Fixes: 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into one") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Aaron Conole Link: https://patch.msgid.link/20250613123629.-XSoQTCu@linutronix.de Signed-off-by: Paolo Abeni --- net/openvswitch/actions.c | 23 +++++++++------------ net/openvswitch/datapath.c | 42 +++++++++++++++++++++++++++++++------- net/openvswitch/datapath.h | 3 ++- 3 files changed, 47 insertions(+), 21 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e7269a3eec79..3add108340bf 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -39,16 +39,14 @@ #include "flow_netlink.h" #include "openvswitch_trace.h" -DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = { - .bh_lock = INIT_LOCAL_LOCK(bh_lock), -}; +struct ovs_pcpu_storage __percpu *ovs_pcpu_storage; /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' * space. Return NULL if out of key spaces. */ static struct sw_flow_key *clone_key(const struct sw_flow_key *key_) { - struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); struct action_flow_keys *keys = &ovs_pcpu->flow_keys; int level = ovs_pcpu->exec_level; struct sw_flow_key *key = NULL; @@ -94,7 +92,7 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb, const struct nlattr *actions, const int actions_len) { - struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos); struct deferred_action *da; da = action_fifo_put(fifo); @@ -755,7 +753,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key, static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); + struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data); struct vport *vport = data->vport; if (skb_cow_head(skb, data->l2_len) < 0) { @@ -807,7 +805,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb, unsigned int hlen = skb_network_offset(skb); struct ovs_frag_data *data; - data = this_cpu_ptr(&ovs_pcpu_storage.frag_data); + data = this_cpu_ptr(&ovs_pcpu_storage->frag_data); data->dst = skb->_skb_refdst; data->vport = vport; data->cb = *OVS_CB(skb); @@ -1566,16 +1564,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, clone = clone_flow_key ? clone_key(key) : key; if (clone) { int err = 0; - if (actions) { /* Sample action */ if (clone_flow_key) - __this_cpu_inc(ovs_pcpu_storage.exec_level); + __this_cpu_inc(ovs_pcpu_storage->exec_level); err = do_execute_actions(dp, skb, clone, actions, len); if (clone_flow_key) - __this_cpu_dec(ovs_pcpu_storage.exec_level); + __this_cpu_dec(ovs_pcpu_storage->exec_level); } else { /* Recirc action */ clone->recirc_id = recirc_id; ovs_dp_process_packet(skb, clone); @@ -1611,7 +1608,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb, static void process_deferred_actions(struct datapath *dp) { - struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos); + struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos); /* Do not touch the FIFO in case there is no deferred actions. */ if (action_fifo_is_empty(fifo)) @@ -1642,7 +1639,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, { int err, level; - level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level); + level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level); if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); @@ -1659,6 +1656,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, process_deferred_actions(dp); out: - __this_cpu_dec(ovs_pcpu_storage.exec_level); + __this_cpu_dec(ovs_pcpu_storage->exec_level); return err; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6a304ae2d959..b990dc83504f 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -244,7 +244,7 @@ void ovs_dp_detach_port(struct vport *p) /* Must be called with rcu_read_lock. */ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { - struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage); const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; struct sw_flow *flow; @@ -299,7 +299,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) * avoided. */ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) { - local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); ovs_pcpu->owner = current; ovs_pcpu_locked = true; } @@ -310,7 +310,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_dp_name(dp), error); if (ovs_pcpu_locked) { ovs_pcpu->owner = NULL; - local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); } stats_counter = &stats->n_hit; @@ -689,13 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) sf_acts = rcu_dereference(flow->sf_acts); local_bh_disable(); - local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + local_lock_nested_bh(&ovs_pcpu_storage->bh_lock); if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_write(ovs_pcpu_storage.owner, current); + this_cpu_write(ovs_pcpu_storage->owner, current); err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); if (IS_ENABLED(CONFIG_PREEMPT_RT)) - this_cpu_write(ovs_pcpu_storage.owner, NULL); - local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + this_cpu_write(ovs_pcpu_storage->owner, NULL); + local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock); local_bh_enable(); rcu_read_unlock(); @@ -2744,6 +2744,28 @@ static struct drop_reason_list drop_reason_list_ovs = { .n_reasons = ARRAY_SIZE(ovs_drop_reasons), }; +static int __init ovs_alloc_percpu_storage(void) +{ + unsigned int cpu; + + ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage); + if (!ovs_pcpu_storage) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + struct ovs_pcpu_storage *ovs_pcpu; + + ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu); + local_lock_init(&ovs_pcpu->bh_lock); + } + return 0; +} + +static void ovs_free_percpu_storage(void) +{ + free_percpu(ovs_pcpu_storage); +} + static int __init dp_init(void) { int err; @@ -2753,6 +2775,10 @@ static int __init dp_init(void) pr_info("Open vSwitch switching datapath\n"); + err = ovs_alloc_percpu_storage(); + if (err) + goto error; + err = ovs_internal_dev_rtnl_link_register(); if (err) goto error; @@ -2799,6 +2825,7 @@ static int __init dp_init(void) error_unreg_rtnl_link: ovs_internal_dev_rtnl_link_unregister(); error: + ovs_free_percpu_storage(); return err; } @@ -2813,6 +2840,7 @@ static void dp_cleanup(void) ovs_vport_exit(); ovs_flow_exit(); ovs_internal_dev_rtnl_link_unregister(); + ovs_free_percpu_storage(); } module_init(dp_init); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 1b5348b0f559..cfeb817a1889 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -220,7 +220,8 @@ struct ovs_pcpu_storage { struct task_struct *owner; local_lock_t bh_lock; }; -DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage); + +extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage; /** * enum ovs_pkt_hash_types - hash info to include with a packet From db22720545207f734aaa9d9f71637bfc8b0155e0 Mon Sep 17 00:00:00 2001 From: Brett Werling Date: Thu, 12 Jun 2025 14:18:25 -0500 Subject: [PATCH 352/497] can: tcan4x5x: fix power regulator retrieval during probe Fixes the power regulator retrieval in tcan4x5x_can_probe() by ensuring the regulator pointer is not set to NULL in the successful return from devm_regulator_get_optional(). Fixes: 3814ca3a10be ("can: tcan4x5x: tcan4x5x_can_probe(): turn on the power before parsing the config") Signed-off-by: Brett Werling Link: https://patch.msgid.link/20250612191825.3646364-1-brett.werling@garmin.com Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x-core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index e5c162f8c589..8edaa339d590 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -411,10 +411,11 @@ static int tcan4x5x_can_probe(struct spi_device *spi) priv = cdev_to_priv(mcan_class); priv->power = devm_regulator_get_optional(&spi->dev, "vsup"); - if (PTR_ERR(priv->power) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; - goto out_m_can_class_free_dev; - } else { + if (IS_ERR(priv->power)) { + if (PTR_ERR(priv->power) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto out_m_can_class_free_dev; + } priv->power = NULL; } From 5d3bc9e5e725aa36cca9b794e340057feb6880b4 Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Tue, 20 May 2025 22:36:56 +0530 Subject: [PATCH 353/497] net: ice: Perform accurate aRFS flow match This patch fixes an issue seen in a large-scale deployment under heavy incoming pkts where the aRFS flow wrongly matches a flow and reprograms the NIC with wrong settings. That mis-steering causes RX-path latency spikes and noisy neighbor effects when many connections collide on the same hash (some of our production servers have 20-30K connections). set_rps_cpu() calls ndo_rx_flow_steer() with flow_id that is calculated by hashing the skb sized by the per rx-queue table size. This results in multiple connections (even across different rx-queues) getting the same hash value. The driver steer function modifies the wrong flow to use this rx-queue, e.g.: Flow#1 is first added: Flow#1: , Hash 'h', q#10 Later when a new flow needs to be added: Flow#2: , Hash 'h', q#20 The driver finds the hash 'h' from Flow#1 and updates it to use q#20. This results in both flows getting un-optimized - packets for Flow#1 goes to q#20, and then reprogrammed back to q#10 later and so on; and Flow #2 programming is never done as Flow#1 is matched first for all misses. Many flows may wrongly share the same hash and reprogram rules of the original flow each with their own q#. Tested on two 144-core servers with 16K netperf sessions for 180s. Netperf clients are pinned to cores 0-71 sequentially (so that wrong packets on q#s 72-143 can be measured). IRQs are set 1:1 for queues -> CPUs, enable XPS, enable aRFS (global value is 144 * rps_flow_cnt). Test notes about results from ice_rx_flow_steer(): --------------------------------------------------- 1. "Skip:" counter increments here: if (fltr_info->q_index == rxq_idx || arfs_entry->fltr_state != ICE_ARFS_ACTIVE) goto out; 2. "Add:" counter increments here: ret = arfs_entry->fltr_info.fltr_id; INIT_HLIST_NODE(&arfs_entry->list_entry); 3. "Update:" counter increments here: /* update the queue to forward to on an already existing flow */ Runtime comparison: original code vs with the patch for different rps_flow_cnt values. +-------------------------------+--------------+--------------+ | rps_flow_cnt | 512 | 2048 | +-------------------------------+--------------+--------------+ | Ratio of Pkts on Good:Bad q's | 214 vs 822K | 1.1M vs 980K | | Avoid wrong aRFS programming | 0 vs 310K | 0 vs 30K | | CPU User | 216 vs 183 | 216 vs 206 | | CPU System | 1441 vs 1171 | 1447 vs 1320 | | CPU Softirq | 1245 vs 920 | 1238 vs 961 | | CPU Total | 29 vs 22.7 | 29 vs 24.9 | | aRFS Update | 533K vs 59 | 521K vs 32 | | aRFS Skip | 82M vs 77M | 7.2M vs 4.5M | +-------------------------------+--------------+--------------+ A separate TCP_STREAM and TCP_RR with 1,4,8,16,64,128,256,512 connections showed no performance degradation. Some points on the patch/aRFS behavior: 1. Enabling full tuple matching ensures flows are always correctly matched, even with smaller hash sizes. 2. 5-6% drop in CPU utilization as the packets arrive at the correct CPUs and fewer calls to driver for programming on misses. 3. Larger hash tables reduces mis-steering due to more unique flow hashes, but still has clashes. However, with larger per-device rps_flow_cnt, old flows take more time to expire and new aRFS flows cannot be added if h/w limits are reached (rps_may_expire_flow() succeeds when 10*rps_flow_cnt pkts have been processed by this cpu that are not part of the flow). Fixes: 28bf26724fdb0 ("ice: Implement aRFS") Signed-off-by: Krishna Kumar Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_arfs.c | 48 +++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 2bc5c7f59844..1f7834c03550 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -377,6 +377,50 @@ ice_arfs_is_perfect_flow_set(struct ice_hw *hw, __be16 l3_proto, u8 l4_proto) return false; } +/** + * ice_arfs_cmp - Check if aRFS filter matches this flow. + * @fltr_info: filter info of the saved ARFS entry. + * @fk: flow dissector keys. + * @n_proto: One of htons(ETH_P_IP) or htons(ETH_P_IPV6). + * @ip_proto: One of IPPROTO_TCP or IPPROTO_UDP. + * + * Since this function assumes limited values for n_proto and ip_proto, it + * is meant to be called only from ice_rx_flow_steer(). + * + * Return: + * * true - fltr_info refers to the same flow as fk. + * * false - fltr_info and fk refer to different flows. + */ +static bool +ice_arfs_cmp(const struct ice_fdir_fltr *fltr_info, const struct flow_keys *fk, + __be16 n_proto, u8 ip_proto) +{ + /* Determine if the filter is for IPv4 or IPv6 based on flow_type, + * which is one of ICE_FLTR_PTYPE_NONF_IPV{4,6}_{TCP,UDP}. + */ + bool is_v4 = fltr_info->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP || + fltr_info->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP; + + /* Following checks are arranged in the quickest and most discriminative + * fields first for early failure. + */ + if (is_v4) + return n_proto == htons(ETH_P_IP) && + fltr_info->ip.v4.src_port == fk->ports.src && + fltr_info->ip.v4.dst_port == fk->ports.dst && + fltr_info->ip.v4.src_ip == fk->addrs.v4addrs.src && + fltr_info->ip.v4.dst_ip == fk->addrs.v4addrs.dst && + fltr_info->ip.v4.proto == ip_proto; + + return fltr_info->ip.v6.src_port == fk->ports.src && + fltr_info->ip.v6.dst_port == fk->ports.dst && + fltr_info->ip.v6.proto == ip_proto && + !memcmp(&fltr_info->ip.v6.src_ip, &fk->addrs.v6addrs.src, + sizeof(struct in6_addr)) && + !memcmp(&fltr_info->ip.v6.dst_ip, &fk->addrs.v6addrs.dst, + sizeof(struct in6_addr)); +} + /** * ice_rx_flow_steer - steer the Rx flow to where application is being run * @netdev: ptr to the netdev being adjusted @@ -448,6 +492,10 @@ ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb, continue; fltr_info = &arfs_entry->fltr_info; + + if (!ice_arfs_cmp(fltr_info, &fk, n_proto, ip_proto)) + continue; + ret = fltr_info->fltr_id; if (fltr_info->q_index == rxq_idx || From 48c8b214974dc55283bd5f12e3a483b27c403bbc Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Fri, 16 May 2025 15:09:07 +0200 Subject: [PATCH 354/497] ice: fix eswitch code memory leak in reset scenario Add simple eswitch mode checker in attaching VF procedure and allocate required port representor memory structures only in switchdev mode. The reset flows triggers VF (if present) detach/attach procedure. It might involve VF port representor(s) re-creation if the device is configured is switchdev mode (not legacy one). The memory was blindly allocated in current implementation, regardless of the mode and not freed if in legacy mode. Kmemeleak trace: unreferenced object (percpu) 0x7e3bce5b888458 (size 40): comm "bash", pid 1784, jiffies 4295743894 hex dump (first 32 bytes on cpu 45): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 0): pcpu_alloc_noprof+0x4c4/0x7c0 ice_repr_create+0x66/0x130 [ice] ice_repr_create_vf+0x22/0x70 [ice] ice_eswitch_attach_vf+0x1b/0xa0 [ice] ice_reset_all_vfs+0x1dd/0x2f0 [ice] ice_pci_err_resume+0x3b/0xb0 [ice] pci_reset_function+0x8f/0x120 reset_store+0x56/0xa0 kernfs_fop_write_iter+0x120/0x1b0 vfs_write+0x31c/0x430 ksys_write+0x61/0xd0 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x76/0x7e Testing hints (ethX is PF netdev): - create at least one VF echo 1 > /sys/class/net/ethX/device/sriov_numvfs - trigger the reset echo 1 > /sys/class/net/ethX/device/reset Fixes: 415db8399d06 ("ice: make representor code generic") Signed-off-by: Grzegorz Nitka Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 6aae03771746..2e4f0969035f 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -508,10 +508,14 @@ ice_eswitch_attach(struct ice_pf *pf, struct ice_repr *repr, unsigned long *id) */ int ice_eswitch_attach_vf(struct ice_pf *pf, struct ice_vf *vf) { - struct ice_repr *repr = ice_repr_create_vf(vf); struct devlink *devlink = priv_to_devlink(pf); + struct ice_repr *repr; int err; + if (!ice_is_eswitch_mode_switchdev(pf)) + return 0; + + repr = ice_repr_create_vf(vf); if (IS_ERR(repr)) return PTR_ERR(repr); From 688a0d61b2d7427189c4eb036ce485d8fc957cbb Mon Sep 17 00:00:00 2001 From: Vitaly Lifshits Date: Sun, 25 May 2025 11:38:43 +0300 Subject: [PATCH 355/497] e1000e: set fixed clock frequency indication for Nahum 11 and Nahum 13 On some systems with Nahum 11 and Nahum 13 the value of the XTAL clock in the software STRAP is incorrect. This causes the PTP timer to run at the wrong rate and can lead to synchronization issues. The STRAP value is configured by the system firmware, and a firmware update is not always possible. Since the XTAL clock on these systems always runs at 38.4MHz, the driver may ignore the STRAP and just set the correct value. Fixes: cc23f4f0b6b9 ("e1000e: Add support for Meteor Lake") Signed-off-by: Vitaly Lifshits Tested-by: Mor Bar-Gabay Reviewed-by: Gil Fine Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 14 +++++++++++--- drivers/net/ethernet/intel/e1000e/ptp.c | 8 +++++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index a96f4cfa6e17..7719e15813ee 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -3534,9 +3534,6 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: - case e1000_pch_mtp: - case e1000_pch_lnp: - case e1000_pch_ptp: case e1000_pch_nvp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) { /* Stable 24MHz frequency */ @@ -3552,6 +3549,17 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca) adapter->cc.shift = shift; } break; + case e1000_pch_mtp: + case e1000_pch_lnp: + case e1000_pch_ptp: + /* System firmware can misreport this value, so set it to a + * stable 38400KHz frequency. + */ + incperiod = INCPERIOD_38400KHZ; + incvalue = INCVALUE_38400KHZ; + shift = INCVALUE_SHIFT_38400KHZ; + adapter->cc.shift = shift; + break; case e1000_82574: case e1000_82583: /* Stable 25MHz frequency */ diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 89d57dd911dc..ea3c3eb2ef20 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -295,15 +295,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: - case e1000_pch_mtp: - case e1000_pch_lnp: - case e1000_pch_ptp: case e1000_pch_nvp: if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; else adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; break; + case e1000_pch_mtp: + case e1000_pch_lnp: + case e1000_pch_ptp: + adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; + break; case e1000_82574: case e1000_82583: adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; From c50784e99f0e7199cdb12dbddf02229b102744ef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Jun 2025 13:23:07 -1000 Subject: [PATCH 356/497] sched_ext: Make scx_group_set_weight() always update tg->scx.weight Otherwise, tg->scx.weight can go out of sync while scx_cgroup is not enabled and ops.cgroup_init() may be called with a stale weight value. Signed-off-by: Tejun Heo Fixes: 819513666966 ("sched_ext: Add cgroup support") Cc: stable@vger.kernel.org # v6.12+ --- kernel/sched/ext.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2c41c78be61e..33a0d8c6ff95 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4241,12 +4241,12 @@ void scx_group_set_weight(struct task_group *tg, unsigned long weight) percpu_down_read(&scx_cgroup_rwsem); - if (scx_cgroup_enabled && tg->scx_weight != weight) { - if (SCX_HAS_OP(sch, cgroup_set_weight)) - SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, - tg_cgrp(tg), weight); - tg->scx_weight = weight; - } + if (scx_cgroup_enabled && SCX_HAS_OP(sch, cgroup_set_weight) && + tg->scx_weight != weight) + SCX_CALL_OP(sch, SCX_KF_UNLOCKED, cgroup_set_weight, NULL, + tg_cgrp(tg), weight); + + tg->scx_weight = weight; percpu_up_read(&scx_cgroup_rwsem); } From 33796b91871ad4010c8188372dd1faf97cf0f1c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Jun 2025 10:13:25 -1000 Subject: [PATCH 357/497] sched_ext, sched/core: Don't call scx_group_set_weight() prematurely from sched_create_group() During task_group creation, sched_create_group() calls scx_group_set_weight() with CGROUP_WEIGHT_DFL to initialize the sched_ext portion. This is premature and ends up calling ops.cgroup_set_weight() with an incorrect @cgrp before ops.cgroup_init() is called. sched_create_group() should just initialize SCX related fields in the new task_group. Fix it by factoring out scx_tg_init() from sched_init() and making sched_create_group() call that function instead of scx_group_set_weight(). v2: Retain CONFIG_EXT_GROUP_SCHED ifdef in sched_init() as removing it leads to build failures on !CONFIG_GROUP_SCHED configs. Signed-off-by: Tejun Heo Fixes: 819513666966 ("sched_ext: Add cgroup support") Cc: stable@vger.kernel.org # v6.12+ --- kernel/sched/core.c | 4 ++-- kernel/sched/ext.c | 5 +++++ kernel/sched/ext.h | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dce50fa57471..8988d38d46a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8545,7 +8545,7 @@ void __init sched_init(void) init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL); #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_EXT_GROUP_SCHED - root_task_group.scx_weight = CGROUP_WEIGHT_DFL; + scx_tg_init(&root_task_group); #endif /* CONFIG_EXT_GROUP_SCHED */ #ifdef CONFIG_RT_GROUP_SCHED root_task_group.rt_se = (struct sched_rt_entity **)ptr; @@ -8985,7 +8985,7 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_rt_sched_group(tg, parent)) goto err; - scx_group_set_weight(tg, CGROUP_WEIGHT_DFL); + scx_tg_init(tg); alloc_uclamp_sched_group(tg, parent); return tg; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 33a0d8c6ff95..b498d867ba21 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4092,6 +4092,11 @@ bool scx_can_stop_tick(struct rq *rq) DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); static bool scx_cgroup_enabled; +void scx_tg_init(struct task_group *tg) +{ + tg->scx_weight = CGROUP_WEIGHT_DFL; +} + int scx_tg_online(struct task_group *tg) { struct scx_sched *sch = scx_root; diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 6e5072f57771..a75835c23f15 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -79,6 +79,7 @@ static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {} #ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_EXT_GROUP_SCHED +void scx_tg_init(struct task_group *tg); int scx_tg_online(struct task_group *tg); void scx_tg_offline(struct task_group *tg); int scx_cgroup_can_attach(struct cgroup_taskset *tset); @@ -88,6 +89,7 @@ void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); void scx_group_set_idle(struct task_group *tg, bool idle); #else /* CONFIG_EXT_GROUP_SCHED */ +static inline void scx_tg_init(struct task_group *tg) {} static inline int scx_tg_online(struct task_group *tg) { return 0; } static inline void scx_tg_offline(struct task_group *tg) {} static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } From 261dce3d64021e7ec828a17b4975ce9182e54ceb Mon Sep 17 00:00:00 2001 From: Chuyi Zhou Date: Tue, 17 Jun 2025 12:42:16 +0800 Subject: [PATCH 358/497] workqueue: Initialize wq_isolated_cpumask in workqueue_init_early() Now when isolcpus is enabled via the cmdline, wq_isolated_cpumask does not include these isolated CPUs, even wq_unbound_cpumask has already excluded them. It is only when we successfully configure an isolate cpuset partition that wq_isolated_cpumask gets overwritten by workqueue_unbound_exclude_cpumask(), including both the cmdline-specified isolated CPUs and the isolated CPUs within the cpuset partitions. Fix this issue by initializing wq_isolated_cpumask properly in workqueue_init_early(). Fixes: fe28f631fa94 ("workqueue: Add workqueue_unbound_exclude_cpumask() to exclude CPUs from wq_unbound_cpumask") Signed-off-by: Chuyi Zhou Reviewed-by: Waiman Long Signed-off-by: Tejun Heo --- kernel/workqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 97f37b5bae66..9f9148075828 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -7767,7 +7767,8 @@ void __init workqueue_init_early(void) restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask); cpumask_copy(wq_requested_unbound_cpumask, wq_unbound_cpumask); - + cpumask_andnot(wq_isolated_cpumask, cpu_possible_mask, + housekeeping_cpumask(HK_TYPE_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); unbound_wq_update_pwq_attrs_buf = alloc_workqueue_attrs(); From bb6b4143503750318a3f85975186db899a3caaf2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 18:08:40 -0300 Subject: [PATCH 359/497] tools arch amd ibs: Sync ibs.h with the kernel sources To pick up the changes from: 861c6b1185fbb2e3 ("x86/platform/amd: Add standard header guards to ") A small change to tools/perf/check-headers.sh was made to cope with the move of this header done in: 3846389c03a85188 ("x86/platform/amd: Move the header to ") That don't result in any changes in the tools, just address this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/amd/ibs.h arch/x86/include/asm/amd/ibs.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/aEtCi0pup5FEwnzn@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/amd/ibs.h | 5 +++++ tools/perf/check-headers.sh | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/amd/ibs.h b/tools/arch/x86/include/asm/amd/ibs.h index 300b6e0765b2..cbce54fec7b9 100644 --- a/tools/arch/x86/include/asm/amd/ibs.h +++ b/tools/arch/x86/include/asm/amd/ibs.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_IBS_H +#define _ASM_X86_AMD_IBS_H + /* * From PPR Vol 1 for AMD Family 19h Model 01h B1 * 55898 Rev 0.35 - Feb 5, 2021 @@ -151,3 +154,5 @@ struct perf_ibs_data { }; u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; }; + +#endif /* _ASM_X86_AMD_IBS_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index e9fab20e9330..8085e4d1d8af 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -186,7 +186,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include "' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' -check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include " -I "^#include " -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' From fc92099902fbf21000554678a47654b029c15a4d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 Jun 2025 12:36:06 -0300 Subject: [PATCH 360/497] tools headers: Synchronize linux/bits.h with the kernel sources To pick up the changes in this cset: 1e7933a575ed8af4 ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") 5b572e8a9f3dcd6e ("bits: introduce fixed-type BIT_U*()") 19408200c094858d ("bits: introduce fixed-type GENMASK_U*()") 31299a5e02112411 ("bits: add comments and newlines to #if, #else and #endif directives") This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/bits.h include/linux/bits.h Please see tools/include/uapi/README for further details. Acked-by: Vincent Mailhol Cc: I Hsin Cheng Cc: Yury Norov Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Lucas De Marchi Cc: Namhyung Kim Cc: Yury Norov Link: https://lore.kernel.org/r/aEr0ZJ60EbshEy6p@x1 Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/bits.h | 4 +-- tools/include/linux/bits.h | 57 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/include/uapi/linux/bits.h b/include/uapi/linux/bits.h index a04afef9efca..682b406e1067 100644 --- a/include/uapi/linux/bits.h +++ b/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 14fd0ca9a6cd..7ad056219115 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -12,6 +12,7 @@ #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -19,16 +20,68 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + +/* + * Missing asm support + * + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include #include +#include + #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) -#else + +/* + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 + */ +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) + +/* + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: + * + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) + */ +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + +#else /* defined(__ASSEMBLY__) */ + /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ #define GENMASK_INPUT_CHECK(h, l) 0 -#endif + +#endif /* !defined(__ASSEMBLY__) */ #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) From ebec62bc7ec435b475722a5467d67c720a1ad79f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 13 Jun 2025 17:41:05 -0700 Subject: [PATCH 361/497] perf evsel: Missed close() when probing hybrid core PMUs Add missing close() to avoid leaking perf events. In past perfs this mattered little as the function was just used by 'perf list'. As the function is now used to detect hybrid PMUs leaking the perf event is somewhat more painful. Fixes: b41f1cec91c37eee ("perf list: Skip unsupported events") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiapeng Chong Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Tiezhu Yang Link: https://lore.kernel.org/r/20250614004108.1650988-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/print-events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index a786cbfb0ff5..83aaf7cda635 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config) ret = evsel__open(evsel, NULL, tmap) >= 0; } + evsel__close(evsel); evsel__delete(evsel); } From a89f5fae998bdc4d0505306f93844c9ae059d50c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 10 Jun 2025 18:52:56 +0900 Subject: [PATCH 362/497] ksmbd: add free_transport ops in ksmbd connection free_transport function for tcp connection can be called from smbdirect. It will cause kernel oops. This patch add free_transport ops in ksmbd connection, and add each free_transports for tcp and smbdirect. Fixes: 21a4e47578d4 ("ksmbd: fix use-after-free in __smb2_lease_break_noti()") Reviewed-by: Stefan Metzmacher Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/connection.c | 2 +- fs/smb/server/connection.h | 1 + fs/smb/server/transport_rdma.c | 10 ++++++++-- fs/smb/server/transport_tcp.c | 3 ++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 83764c230e9d..3f04a2977ba8 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -40,7 +40,7 @@ void ksmbd_conn_free(struct ksmbd_conn *conn) kvfree(conn->request_buf); kfree(conn->preauth_info); if (atomic_dec_and_test(&conn->refcnt)) { - ksmbd_free_transport(conn->transport); + conn->transport->ops->free_transport(conn->transport); kfree(conn); } } diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 6efed923bd68..dd3e0e3f7bf0 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -133,6 +133,7 @@ struct ksmbd_transport_ops { void *buf, unsigned int len, struct smb2_buffer_desc_v1 *desc, unsigned int desc_len); + void (*free_transport)(struct ksmbd_transport *kt); }; struct ksmbd_transport { diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 4998df04ab95..64a428a06ace 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -159,7 +159,8 @@ struct smb_direct_transport { }; #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) - +#define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ + struct smb_direct_transport, transport)) enum { SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, SMB_DIRECT_MSG_DATA_TRANSFER @@ -410,6 +411,11 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) return NULL; } +static void smb_direct_free_transport(struct ksmbd_transport *kt) +{ + kfree(SMBD_TRANS(kt)); +} + static void free_transport(struct smb_direct_transport *t) { struct smb_direct_recvmsg *recvmsg; @@ -455,7 +461,6 @@ static void free_transport(struct smb_direct_transport *t) smb_direct_destroy_pools(t); ksmbd_conn_free(KSMBD_TRANS(t)->conn); - kfree(t); } static struct smb_direct_sendmsg @@ -2281,4 +2286,5 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .read = smb_direct_read, .rdma_read = smb_direct_rdma_read, .rdma_write = smb_direct_rdma_write, + .free_transport = smb_direct_free_transport, }; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index abedf510899a..4e9f98db9ff4 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -93,7 +93,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return t; } -void ksmbd_free_transport(struct ksmbd_transport *kt) +static void ksmbd_tcp_free_transport(struct ksmbd_transport *kt) { struct tcp_transport *t = TCP_TRANS(kt); @@ -656,4 +656,5 @@ static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = { .read = ksmbd_tcp_read, .writev = ksmbd_tcp_writev, .disconnect = ksmbd_tcp_disconnect, + .free_transport = ksmbd_tcp_free_transport, }; From 7ac5b66acafcc9292fb935d7e03790f2b8b2dc0e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 13 Jun 2025 10:12:43 +0900 Subject: [PATCH 363/497] ksmbd: fix null pointer dereference in destroy_previous_session If client set ->PreviousSessionId on kerberos session setup stage, NULL pointer dereference error will happen. Since sess->user is not set yet, It can pass the user argument as NULL to destroy_previous_session. sess->user will be set in ksmbd_krb5_authenticate(). So this patch move calling destroy_previous_session() after ksmbd_krb5_authenticate(). Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-27391 Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 1a308171b599..6645d8fd772e 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmbd_work *work, out_len = work->response_sz - (le16_to_cpu(rsp->SecurityBufferOffset) + 4); - /* Check previous session */ - prev_sess_id = le64_to_cpu(req->PreviousSessionId); - if (prev_sess_id && prev_sess_id != sess->id) - destroy_previous_session(conn, sess->user, prev_sess_id); - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { ksmbd_debug(SMB, "krb5 authentication failed\n"); return -EINVAL; } + + /* Check previous session */ + prev_sess_id = le64_to_cpu(req->PreviousSessionId); + if (prev_sess_id && prev_sess_id != sess->id) + destroy_previous_session(conn, sess->user, prev_sess_id); + rsp->SecurityBufferLength = cpu_to_le16(out_len); if ((conn->sign || server_conf.enforced_signing) || From 4ea0bb8aaedfad8e695429cda6bd1c8b0dad0844 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 13 Jun 2025 11:51:29 +0900 Subject: [PATCH 364/497] ksmbd: handle set/get info file for streamed file The bug only appears when: - windows 11 copies a file that has an alternate data stream - streams_xattr is enabled on the share configuration. Microsoft Edge adds a ZoneIdentifier data stream containing the URL for files it downloads. Another way to create a test file: - open cmd.exe - echo "hello from default data stream" > hello.txt - echo "hello again from ads" > hello.txt:ads.txt If you open the file using notepad, we'll see the first message. If you run "notepad hello.txt:ads.txt" in cmd.exe, we should see the second message. dir /s /r should least all streams for the file. The truncation happens because the windows 11 client sends a SetInfo/EndOfFile message on the ADS, but it is instead applied on the main file, because we don't check fp->stream. When receiving set/get info file for a stream file, Change to process requests using stream position and size. Truncate is unnecessary for stream files, so we skip set_file_allocation_info and set_end_of_file_info operations. Reported-by: Marios Makassikis Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 63 +++++++++++++++++++++++++++++++-------- fs/smb/server/vfs.c | 5 ++-- fs/smb/server/vfs_cache.h | 1 + 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 6645d8fd772e..fafa86273f12 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -4872,8 +4872,13 @@ static int get_file_standard_info(struct smb2_query_info_rsp *rsp, sinfo = (struct smb2_file_standard_info *)rsp->Buffer; delete_pending = ksmbd_inode_pending_delete(fp); - sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9); - sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9); + sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + sinfo->AllocationSize = cpu_to_le64(fp->stream.size); + sinfo->EndOfFile = cpu_to_le64(fp->stream.size); + } sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); sinfo->DeletePending = delete_pending; sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0; @@ -4936,9 +4941,14 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; file_info->Pad1 = 0; - file_info->AllocationSize = - cpu_to_le64(stat.blocks << 9); - file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + file_info->AllocationSize = + cpu_to_le64(stat.blocks << 9); + file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + } file_info->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); file_info->DeletePending = delete_pending; @@ -4947,7 +4957,10 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->IndexNumber = cpu_to_le64(stat.ino); file_info->EASize = 0; file_info->AccessFlags = fp->daccess; - file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + if (ksmbd_stream_fd(fp) == false) + file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + else + file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos); file_info->Mode = fp->coption; file_info->AlignmentRequirement = 0; conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename, @@ -5135,8 +5148,13 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp, time = ksmbd_UnixTimeToNT(stat.ctime); file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; - file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); - file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + } file_info->Reserved = cpu_to_le32(0); rsp->OutputBufferLength = cpu_to_le32(sizeof(struct smb2_file_ntwrk_info)); @@ -5159,7 +5177,11 @@ static void get_file_position_info(struct smb2_query_info_rsp *rsp, struct smb2_file_pos_info *file_info; file_info = (struct smb2_file_pos_info *)rsp->Buffer; - file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + if (ksmbd_stream_fd(fp) == false) + file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + else + file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos); + rsp->OutputBufferLength = cpu_to_le32(sizeof(struct smb2_file_pos_info)); } @@ -5248,8 +5270,13 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp, file_info->ChangeTime = cpu_to_le64(time); file_info->DosAttributes = fp->f_ci->m_fattr; file_info->Inode = cpu_to_le64(stat.ino); - file_info->EndOfFile = cpu_to_le64(stat.size); - file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + if (ksmbd_stream_fd(fp) == false) { + file_info->EndOfFile = cpu_to_le64(stat.size); + file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + } else { + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + } file_info->HardLinks = cpu_to_le32(stat.nlink); file_info->Mode = cpu_to_le32(stat.mode & 0777); switch (stat.mode & S_IFMT) { @@ -6191,6 +6218,9 @@ static int set_file_allocation_info(struct ksmbd_work *work, if (!(fp->daccess & FILE_WRITE_DATA_LE)) return -EACCES; + if (ksmbd_stream_fd(fp) == true) + return 0; + rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (rc) @@ -6249,7 +6279,8 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp, * truncate of some filesystem like FAT32 fill zero data in * truncated range. */ - if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) { + if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC && + ksmbd_stream_fd(fp) == false) { ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize); rc = ksmbd_vfs_truncate(work, fp, newsize); if (rc) { @@ -6322,7 +6353,13 @@ static int set_file_position_info(struct ksmbd_file *fp, return -EINVAL; } - fp->filp->f_pos = current_byte_offset; + if (ksmbd_stream_fd(fp) == false) + fp->filp->f_pos = current_byte_offset; + else { + if (current_byte_offset > XATTR_SIZE_MAX) + current_byte_offset = XATTR_SIZE_MAX; + fp->stream.pos = current_byte_offset; + } return 0; } diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index ba45e809555a..0f3aad12e495 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -293,6 +293,7 @@ static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos, if (v_len - *pos < count) count = v_len - *pos; + fp->stream.pos = v_len; memcpy(buf, &stream_buf[*pos], count); @@ -456,8 +457,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, true); if (err < 0) goto out; - - fp->filp->f_pos = *pos; + else + fp->stream.pos = size; err = 0; out: kvfree(stream_buf); diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 5bbb179736c2..0708155b5caf 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -44,6 +44,7 @@ struct ksmbd_lock { struct stream { char *name; ssize_t size; + loff_t pos; }; struct ksmbd_inode { From 19f4422d485b2d0a935117a1a16015328f99be25 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 13 Jun 2025 17:41:04 -0700 Subject: [PATCH 365/497] perf test: Directory file descriptor leak Add missed close when iterating over the script directories. Fixes: f3295f5b067d3c26 ("perf tests: Use scandirat for shell script finding") Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiapeng Chong Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Tiezhu Yang Link: https://lore.kernel.org/r/20250614004108.1650988-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/tests-scripts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index 1d5759d08141..3a2a8438f9af 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -260,6 +260,7 @@ static void append_scripts_in_dir(int dir_fd, continue; /* Skip scripts that have a separate driver. */ fd = openat(dir_fd, ent->d_name, O_PATH); append_scripts_in_dir(fd, result, result_sz); + close(fd); } for (i = 0; i < n_dirs; i++) /* Clean up */ zfree(&entlist[i]); From 1c85c94b3767895d70b7a5a49b111f974f5660ec Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Jun 2025 14:33:17 -0300 Subject: [PATCH 366/497] perf bench futex: Fix prctl include in musl libc Namhyung Kim reported: I've updated the perf-tools-next to v6.16-rc1 and found a build error like below on alpine linux 3.18. In file included from bench/futex.c:6: /usr/include/sys/prctl.h:88:8: error: redefinition of 'struct prctl_mm_map' 88 | struct prctl_mm_map { | ^~~~~~~~~~~~ In file included from bench/futex.c:5: /linux/tools/include/uapi/linux/prctl.h:134:8: note: originally defined here 134 | struct prctl_mm_map { | ^~~~~~~~~~~~ make[4]: *** [/linux/tools/build/Makefile.build:86: /build/bench/futex.o] Error 1 git bisect says it's the first commit introduced the failure. So both /usr/include/sys/prctl.h and /linux/tools/include/uapi/linux/prctl.h provide struct prctl_mm_map but their include guard must be different. /usr/include/sys/prctl.h provided by glibc contains the prctl() declaration. It includes also linux/prctl.h. The /usr/include/sys/prctl.h on alpine linux is different. This is probably coming from musl. It contains the PR_* definition and the prctl() declaration. So it clashes here because now the one struct is available twice. The man page for prctl(2) says: | #include /* Definition of PR_* constants */ | #include so musl doesn't follow this. So don't include linux/prctl.h explicitely and add some new defines needed if they aren't available. Acked-by: Sebastian Andrzej Siewior Reported-by: Namhyung Kim Closes: https://lore.kernel.org/r/20250611092542.F4ooE2FL@linutronix.de Link: https://www.openwall.com/lists/musl/2025/06/12/11 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 1 - tools/perf/bench/futex.c | 9 ++++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index fdf133c9520f..d2d6d7f3ea33 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c index 26382e4d8d4c..4c4fee107e59 100644 --- a/tools/perf/bench/futex.c +++ b/tools/perf/bench/futex.c @@ -2,11 +2,18 @@ #include #include #include -#include #include #include "futex.h" +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 +#endif // PR_FUTEX_HASH + void futex_set_nbuckets_param(struct bench_futex_parameters *params) { unsigned long flags; From d222b6e6fb31e320eca506e665694d8ddf459157 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Jun 2025 14:18:55 -0300 Subject: [PATCH 367/497] tools headers x86 cpufeatures: Sync with the kernel sources To pick the changes from: faad6645e1128ec2 ("x86/cpufeatures: Add CPUID feature bit for the Bus Lock Threshold") 159013a7ca18c271 ("x86/its: Enumerate Indirect Target Selection (ITS) bug") f9f27c4a377a8b45 ("x86/cpufeatures: Add "Allowed SEV Features" Feature") b02dc185ee86836c ("x86/cpufeatures: Add X86_FEATURE_APX") d88bb2ded2efdc38 ("KVM: x86: Advertise support for AMD's PREFETCHI") This causes these perf files to be rebuilt and brings some X86_FEATURE that may be used by: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Please see tools/include/uapi/README for further details. Cc: Adrian Hunter Cc: Babu Moger Cc: Chang S. Bae Cc: Dave Hansen Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Kishon Vijay Abraham I Cc: Manali Shukla Cc: Namhyung Kim Cc: Pawan Gupta Cc: Sean Christopherson Link: https://lore.kernel.org/r/aFBWAI3kHYX5aL9G@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e02be2962a01..ee176236c2be 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -336,7 +336,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ +#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ #define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ @@ -379,6 +379,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */ #define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -447,6 +448,7 @@ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ #define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ #define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ +#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ #define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ @@ -458,6 +460,7 @@ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ +#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ @@ -482,7 +485,8 @@ #define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ -#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+ 9) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ /* * BUG word(s) @@ -535,6 +539,8 @@ #define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ -#define X86_BUG_ITS X86_BUG( 1*32+ 6) /* "its" CPU is affected by Indirect Target Selection */ -#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ +#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ +#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ + #endif /* _ASM_X86_CPUFEATURES_H */ From 60524f1d2bdf222db6dc3f680e0272441f697fe4 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Mon, 16 Jun 2025 12:03:19 +0530 Subject: [PATCH 368/497] net: ti: icssg-prueth: Fix packet handling for XDP_TX While transmitting XDP frames for XDP_TX, page_pool is used to get the DMA buffers (already mapped to the pages) and need to be freed/reycled once the transmission is complete. This need not be explicitly done by the driver as this is handled more gracefully by the xdp driver while returning the xdp frame. __xdp_return() frees the XDP memory based on its memory type, under which page_pool memory is also handled. This change fixes the transmit queue timeout while running XDP_TX. logs: [ 309.069682] icssg-prueth icssg1-eth eth2: NETDEV WATCHDOG: CPU: 0: transmit queue 0 timed out 45860 ms [ 313.933780] icssg-prueth icssg1-eth eth2: NETDEV WATCHDOG: CPU: 0: transmit queue 0 timed out 50724 ms [ 319.053656] icssg-prueth icssg1-eth eth2: NETDEV WATCHDOG: CPU: 0: transmit queue 0 timed out 55844 ms ... Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250616063319.3347541-1-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 5b8fdb882172..12f25cec6255 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -98,20 +98,11 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, { struct cppi5_host_desc_t *first_desc, *next_desc; dma_addr_t buf_dma, next_desc_dma; - struct prueth_swdata *swdata; - struct page *page; u32 buf_dma_len; first_desc = desc; next_desc = first_desc; - swdata = cppi5_hdesc_get_swdata(desc); - if (swdata->type == PRUETH_SWDATA_PAGE) { - page = swdata->data.page; - page_pool_recycle_direct(page->pp, swdata->data.page); - goto free_desc; - } - cppi5_hdesc_get_obuf(first_desc, &buf_dma, &buf_dma_len); k3_udma_glue_tx_cppi5_to_dma_addr(tx_chn->tx_chn, &buf_dma); @@ -135,7 +126,6 @@ void prueth_xmit_free(struct prueth_tx_chn *tx_chn, k3_cppi_desc_pool_free(tx_chn->desc_pool, next_desc); } -free_desc: k3_cppi_desc_pool_free(tx_chn->desc_pool, first_desc); } EXPORT_SYMBOL_GPL(prueth_xmit_free); @@ -612,13 +602,8 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, k3_udma_glue_tx_dma_to_cppi5_addr(tx_chn->tx_chn, &buf_dma); cppi5_hdesc_attach_buf(first_desc, buf_dma, xdpf->len, buf_dma, xdpf->len); swdata = cppi5_hdesc_get_swdata(first_desc); - if (page) { - swdata->type = PRUETH_SWDATA_PAGE; - swdata->data.page = page; - } else { - swdata->type = PRUETH_SWDATA_XDPF; - swdata->data.xdpf = xdpf; - } + swdata->type = PRUETH_SWDATA_XDPF; + swdata->data.xdpf = xdpf; /* Report BQL before sending the packet */ netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); From 94a17f2dc90bc7eae36c0f478515d4bd1c23e877 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 10 Jun 2025 15:24:20 -0700 Subject: [PATCH 369/497] x86/mm: Disable INVLPGB when PTI is enabled PTI uses separate ASIDs (aka. PCIDs) for kernel and user address spaces. When the kernel needs to flush the user address space, it just sets a bit in a bitmap and then flushes the entire PCID on the next switch to userspace. This bitmap is a single 'unsigned long' which is plenty for all 6 dynamic ASIDs. But, unfortunately, the INVLPGB support brings along a bunch more user ASIDs, as many as ~2k more. The bitmap can't address that many. Fortunately, the bitmap is only needed for PTI and all the CPUs with INVLPGB are AMD CPUs that aren't vulnerable to Meltdown and don't need PTI. The only way someone can run into an issue in practice is by booting with pti=on on a newer AMD CPU. Disable INVLPGB if PTI is enabled. Avoid overrunning the small bitmap. Note: this will be fixed up properly by making the bitmap bigger. For now, just avoid the mostly theoretical bug. Fixes: 4afeb0ed1753 ("x86/mm: Enable broadcast TLB invalidation for multi-threaded processes") Signed-off-by: Dave Hansen Acked-by: Rik van Riel Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20250610222420.E8CBF472%40davehans-spike.ostc.intel.com --- arch/x86/mm/pti.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 190299834011..c0c40b67524e 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -98,6 +98,11 @@ void __init pti_check_boottime_disable(void) return; setup_force_cpu_cap(X86_FEATURE_PTI); + + if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { + pr_debug("PTI enabled, disabling INVLPGB\n"); + setup_clear_cpu_cap(X86_FEATURE_INVLPGB); + } } static int __init pti_parse_cmdline(char *arg) From 6f793a1d053775f8324b8dba1e7ed224f8b0166f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Sun, 15 Jun 2025 20:07:33 +0000 Subject: [PATCH 370/497] net: netmem: fix skb_ensure_writable with unreadable skbs skb_ensure_writable should succeed when it's trying to write to the header of the unreadable skbs, so it doesn't need an unconditional skb_frags_readable check. The preceding pskb_may_pull() call will succeed if write_len is within the head and fail if we're trying to write to the unreadable payload, so we don't need an additional check. Removing this check restores DSCP functionality with unreadable skbs as it's called from dscp_tg. Cc: willemb@google.com Cc: asml.silence@gmail.com Fixes: 65249feb6b3d ("net: add support for skbs with unreadable frags") Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250615200733.520113-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 85fc82f72d26..d6420b74ea9c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -6261,9 +6261,6 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) if (!pskb_may_pull(skb, write_len)) return -ENOMEM; - if (!skb_frags_readable(skb)) - return -EFAULT; - if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; From 1e9ac33fa271be0d2480fd732f9642d81542500b Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Fri, 13 Jun 2025 16:18:39 -0700 Subject: [PATCH 371/497] bnxt_en: Fix double invocation of bnxt_ulp_stop()/bnxt_ulp_start() Before the commit under the Fixes tag below, bnxt_ulp_stop() and bnxt_ulp_start() were always invoked in pairs. After that commit, the new bnxt_ulp_restart() can be invoked after bnxt_ulp_stop() has been called. This may result in the RoCE driver's aux driver .suspend() method being invoked twice. The 2nd bnxt_re_suspend() call will crash when it dereferences a NULL pointer: (NULL ib_device): Handle device suspend call BUG: kernel NULL pointer dereference, address: 0000000000000b78 PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP PTI CPU: 20 UID: 0 PID: 181 Comm: kworker/u96:5 Tainted: G S 6.15.0-rc1 #4 PREEMPT(voluntary) Tainted: [S]=CPU_OUT_OF_SPEC Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.4.3 01/17/2017 Workqueue: bnxt_pf_wq bnxt_sp_task [bnxt_en] RIP: 0010:bnxt_re_suspend+0x45/0x1f0 [bnxt_re] Code: 8b 05 a7 3c 5b f5 48 89 44 24 18 31 c0 49 8b 5c 24 08 4d 8b 2c 24 e8 ea 06 0a f4 48 c7 c6 04 60 52 c0 48 89 df e8 1b ce f9 ff <48> 8b 83 78 0b 00 00 48 8b 80 38 03 00 00 a8 40 0f 85 b5 00 00 00 RSP: 0018:ffffa2e84084fd88 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffffffb4b6b934 RDI: 00000000ffffffff RBP: ffffa1760954c9c0 R08: 0000000000000000 R09: c0000000ffffdfff R10: 0000000000000001 R11: ffffa2e84084fb50 R12: ffffa176031ef070 R13: ffffa17609775000 R14: ffffa17603adc180 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffffa17daa397000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000b78 CR3: 00000004aaa30003 CR4: 00000000003706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bnxt_ulp_stop+0x69/0x90 [bnxt_en] bnxt_sp_task+0x678/0x920 [bnxt_en] ? __schedule+0x514/0xf50 process_scheduled_works+0x9d/0x400 worker_thread+0x11c/0x260 ? __pfx_worker_thread+0x10/0x10 kthread+0xfe/0x1e0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2b/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Check the BNXT_EN_FLAG_ULP_STOPPED flag and do not proceed if the flag is already set. This will preserve the original symmetrical bnxt_ulp_stop() and bnxt_ulp_start(). Also, inside bnxt_ulp_start(), clear the BNXT_EN_FLAG_ULP_STOPPED flag after taking the mutex to avoid any race condition. And for symmetry, only proceed in bnxt_ulp_start() if the BNXT_EN_FLAG_ULP_STOPPED is set. Fixes: 3c163f35bd50 ("bnxt_en: Optimize recovery path ULP locking in the driver") Signed-off-by: Kalesh AP Co-developed-by: Michael Chan Signed-off-by: Michael Chan Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250613231841.377988-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 84c4812414fd..2450a369b792 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -231,10 +231,9 @@ void bnxt_ulp_stop(struct bnxt *bp) return; mutex_lock(&edev->en_dev_lock); - if (!bnxt_ulp_registered(edev)) { - mutex_unlock(&edev->en_dev_lock); - return; - } + if (!bnxt_ulp_registered(edev) || + (edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) + goto ulp_stop_exit; edev->flags |= BNXT_EN_FLAG_ULP_STOPPED; if (aux_priv) { @@ -250,6 +249,7 @@ void bnxt_ulp_stop(struct bnxt *bp) adrv->suspend(adev, pm); } } +ulp_stop_exit: mutex_unlock(&edev->en_dev_lock); } @@ -258,19 +258,13 @@ void bnxt_ulp_start(struct bnxt *bp, int err) struct bnxt_aux_priv *aux_priv = bp->aux_priv; struct bnxt_en_dev *edev = bp->edev; - if (!edev) - return; - - edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED; - - if (err) + if (!edev || err) return; mutex_lock(&edev->en_dev_lock); - if (!bnxt_ulp_registered(edev)) { - mutex_unlock(&edev->en_dev_lock); - return; - } + if (!bnxt_ulp_registered(edev) || + !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) + goto ulp_start_exit; if (edev->ulp_tbl->msix_requested) bnxt_fill_msix_vecs(bp, edev->msix_entries); @@ -287,6 +281,8 @@ void bnxt_ulp_start(struct bnxt *bp, int err) adrv->resume(adev); } } +ulp_start_exit: + edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED; mutex_unlock(&edev->en_dev_lock); } From e11baaea94e2923739a98abeee85eb0667c04fd3 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Fri, 13 Jun 2025 16:18:40 -0700 Subject: [PATCH 372/497] bnxt_en: Add a helper function to configure MRU and RSS Add a new helper function that will configure MRU and RSS table of a VNIC. This will be useful when we configure both on a VNIC when resetting an RX ring. This function will be used again in the next bug fix patch where we have to reconfigure VNICs for RSS contexts. Suggested-by: Michael Chan Reviewed-by: Simon Horman Reviewed-by: David Wei Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250613231841.377988-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 37 ++++++++++++++++------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 869580b6f70d..dfd2366d4c8c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10780,6 +10780,26 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bp->num_rss_ctx--; } +static int bnxt_set_vnic_mru_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u16 mru) +{ + int rc; + + if (mru) { + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", + vnic->vnic_id, rc); + return rc; + } + } + vnic->mru = mru; + bnxt_hwrm_vnic_update(bp, vnic, + VNIC_UPDATE_REQ_ENABLES_MRU_VALID); + + return 0; +} + static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) { bool set_tpa = !!(bp->flags & BNXT_FLAG_TPA); @@ -15927,6 +15947,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) struct bnxt_vnic_info *vnic; struct bnxt_napi *bnapi; int i, rc; + u16 mru; rxr = &bp->rx_ring[idx]; clone = qmem; @@ -15977,18 +15998,13 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); + mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; - rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", - vnic->vnic_id, rc); + rc = bnxt_set_vnic_mru_p5(bp, vnic, mru); + if (rc) return rc; - } - vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; - bnxt_hwrm_vnic_update(bp, vnic, - VNIC_UPDATE_REQ_ENABLES_MRU_VALID); } return 0; @@ -16013,9 +16029,8 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; - vnic->mru = 0; - bnxt_hwrm_vnic_update(bp, vnic, - VNIC_UPDATE_REQ_ENABLES_MRU_VALID); + + bnxt_set_vnic_mru_p5(bp, vnic, 0); } /* Make sure NAPI sees that the VNIC is disabled */ synchronize_net(); From 5dacc94c6fe61cde6f700e95cf35af9944b022c4 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Fri, 13 Jun 2025 16:18:41 -0700 Subject: [PATCH 373/497] bnxt_en: Update MRU and RSS table of RSS contexts on queue reset The commit under the Fixes tag below which updates the VNICs' RSS and MRU during .ndo_queue_start(), needs to be extended to cover any non-default RSS contexts which have their own VNICs. Without this step, packets that are destined to a non-default RSS context may be dropped after .ndo_queue_start(). We further optimize this scheme by updating the VNIC only if the RX ring being restarted is in the RSS table of the VNIC. Updating the VNIC (in particular setting the MRU to 0) will momentarily stop all traffic to all rings in the RSS table. Any VNIC that has the RX ring excluded from the RSS table can skip this step and avoid the traffic disruption. Note that this scheme is just an improvement. A VNIC with multiple rings in the RSS table will still see traffic disruptions to all rings in the RSS table when one of the rings is being restarted. We are working on a FW scheme that will improve upon this further. Fixes: 5ac066b7b062 ("bnxt_en: Fix queue start to update vnic RSS table") Reported-by: David Wei Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250613231841.377988-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 56 +++++++++++++++++++++-- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dfd2366d4c8c..2cb3185c442c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10780,11 +10780,39 @@ void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bp->num_rss_ctx--; } +static bool bnxt_vnic_has_rx_ring(struct bnxt *bp, struct bnxt_vnic_info *vnic, + int rxr_id) +{ + u16 tbl_size = bnxt_get_rxfh_indir_size(bp->dev); + int i, vnic_rx; + + /* Ntuple VNIC always has all the rx rings. Any change of ring id + * must be updated because a future filter may use it. + */ + if (vnic->flags & BNXT_VNIC_NTUPLE_FLAG) + return true; + + for (i = 0; i < tbl_size; i++) { + if (vnic->flags & BNXT_VNIC_RSSCTX_FLAG) + vnic_rx = ethtool_rxfh_context_indir(vnic->rss_ctx)[i]; + else + vnic_rx = bp->rss_indir_tbl[i]; + + if (rxr_id == vnic_rx) + return true; + } + + return false; +} + static int bnxt_set_vnic_mru_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic, - u16 mru) + u16 mru, int rxr_id) { int rc; + if (!bnxt_vnic_has_rx_ring(bp, vnic, rxr_id)) + return 0; + if (mru) { rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); if (rc) { @@ -10800,6 +10828,24 @@ static int bnxt_set_vnic_mru_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic, return 0; } +static int bnxt_set_rss_ctx_vnic_mru(struct bnxt *bp, u16 mru, int rxr_id) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + int rc; + + xa_for_each(&bp->dev->ethtool->rss_ctx, context, ctx) { + struct bnxt_rss_ctx *rss_ctx = ethtool_rxfh_context_priv(ctx); + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + rc = bnxt_set_vnic_mru_p5(bp, vnic, mru, rxr_id); + if (rc) + return rc; + } + + return 0; +} + static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) { bool set_tpa = !!(bp->flags & BNXT_FLAG_TPA); @@ -16002,12 +16048,11 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; - rc = bnxt_set_vnic_mru_p5(bp, vnic, mru); + rc = bnxt_set_vnic_mru_p5(bp, vnic, mru, idx); if (rc) return rc; } - - return 0; + return bnxt_set_rss_ctx_vnic_mru(bp, mru, idx); err_reset: netdev_err(bp->dev, "Unexpected HWRM error during queue start rc: %d\n", @@ -16030,8 +16075,9 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx) for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; - bnxt_set_vnic_mru_p5(bp, vnic, 0); + bnxt_set_vnic_mru_p5(bp, vnic, 0, idx); } + bnxt_set_rss_ctx_vnic_mru(bp, 0, idx); /* Make sure NAPI sees that the VNIC is disabled */ synchronize_net(); rxr = &bp->rx_ring[idx]; From 3c902383f2da91cba3821b73aa6edd49f4db6023 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Mon, 16 Jun 2025 12:04:32 +0200 Subject: [PATCH 374/497] x86/its: Fix an ifdef typo in its_alloc() Commit a82b26451de1 ("x86/its: explicitly manage permissions for ITS pages") reworks its_alloc() and introduces a typo in an ifdef conditional, referring to CONFIG_MODULE instead of CONFIG_MODULES. Fix this typo in its_alloc(). Fixes: a82b26451de1 ("x86/its: explicitly manage permissions for ITS pages") Signed-off-by: Lukas Bulwahn Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20250616100432.22941-1-lukas.bulwahn%40redhat.com --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6455f7f751b3..9ae80fa904a2 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -228,7 +228,7 @@ static void *its_alloc(void) struct its_array *pages = &its_pages; void *page; -#ifdef CONFIG_MODULE +#ifdef CONFIG_MODULES if (its_mod) pages = &its_mod->arch.its_pages; #endif From 5ab73b010cad294851e558f1d4714a85c6f206c7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 13 Jun 2025 20:47:48 +0300 Subject: [PATCH 375/497] ptp: fix breakage after ptp_vclock_in_use() rework What is broken -------------- ptp4l, and any other application which calls clock_adjtime() on a physical clock, is greeted with error -EBUSY after commit 87f7ce260a3c ("ptp: remove ptp->n_vclocks check logic in ptp_vclock_in_use()"). Explanation for the breakage ---------------------------- The blamed commit was based on the false assumption that ptp_vclock_in_use() callers already test for n_vclocks prior to calling this function. This is notably incorrect for the code path below, in which there is, in fact, no n_vclocks test: ptp_clock_adjtime() -> ptp_clock_freerun() -> ptp_vclock_in_use() The result is that any clock adjustment on any physical clock is now impossible. This is _despite_ there not being any vclock over this physical clock. $ ptp4l -i eno0 -2 -P -m ptp4l[58.425]: selected /dev/ptp0 as PTP clock [ 58.429749] ptp: physical clock is free running ptp4l[58.431]: Failed to open /dev/ptp0: Device or resource busy failed to create a clock $ cat /sys/class/ptp/ptp0/n_vclocks 0 The patch makes the ptp_vclock_in_use() function say "if it's not a virtual clock, then this physical clock does have virtual clocks on top". Then ptp_clock_freerun() uses this information to say "this physical clock has virtual clocks on top, so it must stay free-running". Then ptp_clock_adjtime() uses this information to say "well, if this physical clock has to be free-running, I can't do it, return -EBUSY". Simply put, ptp_vclock_in_use() cannot be simplified so as to remove the test whether vclocks are in use. What did the blamed commit intend to fix ---------------------------------------- The blamed commit presents a lockdep warning stating "possible recursive locking detected", with the n_vclocks_store() and ptp_clock_unregister() functions involved. The recursive locking seems this: n_vclocks_store() -> mutex_lock_interruptible(&ptp->n_vclocks_mux) // 1 -> device_for_each_child_reverse(..., unregister_vclock) -> unregister_vclock() -> ptp_vclock_unregister() -> ptp_clock_unregister() -> ptp_vclock_in_use() -> mutex_lock_interruptible(&ptp->n_vclocks_mux) // 2 The issue can be triggered by creating and then deleting vclocks: $ echo 2 > /sys/class/ptp/ptp0/n_vclocks $ echo 0 > /sys/class/ptp/ptp0/n_vclocks But note that in the original stack trace, the address of the first lock is different from the address of the second lock. This is because at step 1 marked above, &ptp->n_vclocks_mux is the lock of the parent (physical) PTP clock, and at step 2, the lock is of the child (virtual) PTP clock. They are different locks of different devices. In this situation there is no real deadlock, the lockdep warning is caused by the fact that the mutexes have the same lock class on both the parent and the child. Functionally it is fine. Proposed alternative solution ----------------------------- We must reintroduce the body of ptp_vclock_in_use() mostly as it was structured prior to the blamed commit, but avoid the lockdep warning. Based on the fact that vclocks cannot be nested on top of one another (ptp_is_attribute_visible() hides n_vclocks for virtual clocks), we already know that ptp->n_vclocks is zero for a virtual clock. And ptp->is_virtual_clock is a runtime invariant, established at ptp_clock_register() time and never changed. There is no need to serialize on any mutex in order to read ptp->is_virtual_clock, and we take advantage of that by moving it outside the lock. Thus, virtual clocks do not need to acquire &ptp->n_vclocks_mux at all, and step 2 in the code walkthrough above can simply go away. We can simply return false to the question "ptp_vclock_in_use(a virtual clock)". Other notes ----------- Releasing &ptp->n_vclocks_mux before ptp_vclock_in_use() returns execution seems racy, because the returned value can become stale as soon as the function returns and before the return value is used (i.e. n_vclocks_store() can run any time). The locking requirement should somehow be transferred to the caller, to ensure a longer life time for the returned value, but this seems out of scope for this severe bug fix. Because we are also fixing up the logic from the original commit, there is another Fixes: tag for that. Fixes: 87f7ce260a3c ("ptp: remove ptp->n_vclocks check logic in ptp_vclock_in_use()") Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250613174749.406826-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_private.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 528d86a33f37..a6aad743c282 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -98,7 +98,27 @@ static inline int queue_cnt(const struct timestamp_event_queue *q) /* Check if ptp virtual clock is in use */ static inline bool ptp_vclock_in_use(struct ptp_clock *ptp) { - return !ptp->is_virtual_clock; + bool in_use = false; + + /* Virtual clocks can't be stacked on top of virtual clocks. + * Avoid acquiring the n_vclocks_mux on virtual clocks, to allow this + * function to be called from code paths where the n_vclocks_mux of the + * parent physical clock is already held. Functionally that's not an + * issue, but lockdep would complain, because they have the same lock + * class. + */ + if (ptp->is_virtual_clock) + return false; + + if (mutex_lock_interruptible(&ptp->n_vclocks_mux)) + return true; + + if (ptp->n_vclocks) + in_use = true; + + mutex_unlock(&ptp->n_vclocks_mux); + + return in_use; } /* Check if ptp clock shall be free running */ From aa112cbc5f0ac6f3b44d829005bf34005d9fe9bb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 13 Jun 2025 20:47:49 +0300 Subject: [PATCH 376/497] ptp: allow reading of currently dialed frequency to succeed on free-running clocks There is a bug in ptp_clock_adjtime() which makes it refuse the operation even if we just want to read the current clock dialed frequency, not modify anything (tx->modes == 0). That should be possible even if the clock is free-running. For context, the kernel UAPI is the same for getting and setting the frequency of a POSIX clock. For example, ptp4l errors out at clock_create() -> clockadj_get_freq() -> clock_adjtime() time, when it should logically only have failed on actual adjustments to the clock, aka if the clock was configured as slave. But in master mode it should work. This was discovered when examining the issue described in the previous commit, where ptp_clock_freerun() returned true despite n_vclocks being zero. Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250613174749.406826-3-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_clock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 35a5994bf64f..36f57d7b4a66 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -121,7 +121,8 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx) struct ptp_clock_info *ops; int err = -EOPNOTSUPP; - if (ptp_clock_freerun(ptp)) { + if (tx->modes & (ADJ_SETOFFSET | ADJ_FREQUENCY | ADJ_OFFSET) && + ptp_clock_freerun(ptp)) { pr_err("ptp: physical clock is free running\n"); return -EBUSY; } From b160766e26d4e2e2d6fe2294e0b02f92baefcec5 Mon Sep 17 00:00:00 2001 From: Hyunwoo Kim Date: Fri, 13 Jun 2025 20:54:57 -0400 Subject: [PATCH 377/497] net/sched: fix use-after-free in taprio_dev_notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since taprio’s taprio_dev_notifier() isn’t protected by an RCU read-side critical section, a race with advance_sched() can lead to a use-after-free. Adding rcu_read_lock() inside taprio_dev_notifier() prevents this. Fixes: fed87cc6718a ("net/sched: taprio: automatically calculate queueMaxSDU based on TC gate durations") Cc: stable@vger.kernel.org Signed-off-by: Hyunwoo Kim Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/aEzIYYxt0is9upYG@v4bel-B760M-AORUS-ELITE-AX Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 14021b812329..2b14c81a87e5 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1328,13 +1328,15 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, stab = rtnl_dereference(q->root->stab); - oper = rtnl_dereference(q->oper_sched); + rcu_read_lock(); + oper = rcu_dereference(q->oper_sched); if (oper) taprio_update_queue_max_sdu(q, oper, stab); - admin = rtnl_dereference(q->admin_sched); + admin = rcu_dereference(q->admin_sched); if (admin) taprio_update_queue_max_sdu(q, admin, stab); + rcu_read_unlock(); break; } From 18ae7d0cdd76420e80f6ab15ada063708f14ba40 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 9 Jun 2025 22:06:22 -0500 Subject: [PATCH 378/497] wifi: ath12k: Avoid CPU busy-wait by handling VDEV_STAT and BCN_STAT When the ath12k driver is built without CONFIG_ATH12K_DEBUG, the recently refactored stats code can cause any user space application (such at NetworkManager) to consume 100% CPU for 3 seconds, every time stats are read. Commit 'b8a0d83fe4c7 ("wifi: ath12k: move firmware stats out of debugfs")' moved ath12k_debugfs_fw_stats_request() out of debugfs, by merging the additional logic into ath12k_mac_get_fw_stats(). Among the added responsibility of ath12k_mac_get_fw_stats() was the busy-wait for `fw_stats_done`. Signalling of `fw_stats_done` happens when one of the WMI_REQUEST_PDEV_STAT, WMI_REQUEST_VDEV_STAT, and WMI_REQUEST_BCN_STAT messages are received, but the handling of the latter two commands remained in the debugfs code. As `fw_stats_done` isn't signalled, the calling processes will spin until the timeout (3 seconds) is reached. Moving the handling of these two additional responses out of debugfs resolves the issue. Fixes: b8a0d83fe4c7 ("wifi: ath12k: move firmware stats out of debugfs") Signed-off-by: Bjorn Andersson Tested-by: Abel Vesa Link: https://patch.msgid.link/20250609-ath12k-fw-stats-done-v1-1-2b3624656697@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/debugfs.c | 58 -------------------- drivers/net/wireless/ath/ath12k/debugfs.h | 7 --- drivers/net/wireless/ath/ath12k/wmi.c | 67 ++++++++++++++++++++--- 3 files changed, 60 insertions(+), 72 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/debugfs.c b/drivers/net/wireless/ath/ath12k/debugfs.c index dd624d73b8b2..23da93afaa5c 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.c +++ b/drivers/net/wireless/ath/ath12k/debugfs.c @@ -1251,64 +1251,6 @@ void ath12k_debugfs_soc_destroy(struct ath12k_base *ab) */ } -void -ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats) -{ - struct ath12k_base *ab = ar->ab; - struct ath12k_pdev *pdev; - bool is_end; - static unsigned int num_vdev, num_bcn; - size_t total_vdevs_started = 0; - int i; - - if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { - if (list_empty(&stats->vdevs)) { - ath12k_warn(ab, "empty vdev stats"); - return; - } - /* FW sends all the active VDEV stats irrespective of PDEV, - * hence limit until the count of all VDEVs started - */ - rcu_read_lock(); - for (i = 0; i < ab->num_radios; i++) { - pdev = rcu_dereference(ab->pdevs_active[i]); - if (pdev && pdev->ar) - total_vdevs_started += pdev->ar->num_started_vdevs; - } - rcu_read_unlock(); - - is_end = ((++num_vdev) == total_vdevs_started); - - list_splice_tail_init(&stats->vdevs, - &ar->fw_stats.vdevs); - - if (is_end) { - ar->fw_stats.fw_stats_done = true; - num_vdev = 0; - } - return; - } - if (stats->stats_id == WMI_REQUEST_BCN_STAT) { - if (list_empty(&stats->bcn)) { - ath12k_warn(ab, "empty beacon stats"); - return; - } - /* Mark end until we reached the count of all started VDEVs - * within the PDEV - */ - is_end = ((++num_bcn) == ar->num_started_vdevs); - - list_splice_tail_init(&stats->bcn, - &ar->fw_stats.bcn); - - if (is_end) { - ar->fw_stats.fw_stats_done = true; - num_bcn = 0; - } - } -} - static int ath12k_open_vdev_stats(struct inode *inode, struct file *file) { struct ath12k *ar = inode->i_private; diff --git a/drivers/net/wireless/ath/ath12k/debugfs.h b/drivers/net/wireless/ath/ath12k/debugfs.h index ebef7dace344..21641a8a0346 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs.h +++ b/drivers/net/wireless/ath/ath12k/debugfs.h @@ -12,8 +12,6 @@ void ath12k_debugfs_soc_create(struct ath12k_base *ab); void ath12k_debugfs_soc_destroy(struct ath12k_base *ab); void ath12k_debugfs_register(struct ath12k *ar); void ath12k_debugfs_unregister(struct ath12k *ar); -void ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats); void ath12k_debugfs_op_vif_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void ath12k_debugfs_pdev_create(struct ath12k_base *ab); @@ -126,11 +124,6 @@ static inline void ath12k_debugfs_unregister(struct ath12k *ar) { } -static inline void ath12k_debugfs_fw_stats_process(struct ath12k *ar, - struct ath12k_fw_stats *stats) -{ -} - static inline bool ath12k_debugfs_is_extd_rx_stats_enabled(struct ath12k *ar) { return false; diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 60e2444fe08c..2d2444417e2b 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -7626,6 +7626,63 @@ static int ath12k_wmi_pull_fw_stats(struct ath12k_base *ab, struct sk_buff *skb, &parse); } +static void ath12k_wmi_fw_stats_process(struct ath12k *ar, + struct ath12k_fw_stats *stats) +{ + struct ath12k_base *ab = ar->ab; + struct ath12k_pdev *pdev; + bool is_end; + static unsigned int num_vdev, num_bcn; + size_t total_vdevs_started = 0; + int i; + + if (stats->stats_id == WMI_REQUEST_VDEV_STAT) { + if (list_empty(&stats->vdevs)) { + ath12k_warn(ab, "empty vdev stats"); + return; + } + /* FW sends all the active VDEV stats irrespective of PDEV, + * hence limit until the count of all VDEVs started + */ + rcu_read_lock(); + for (i = 0; i < ab->num_radios; i++) { + pdev = rcu_dereference(ab->pdevs_active[i]); + if (pdev && pdev->ar) + total_vdevs_started += pdev->ar->num_started_vdevs; + } + rcu_read_unlock(); + + is_end = ((++num_vdev) == total_vdevs_started); + + list_splice_tail_init(&stats->vdevs, + &ar->fw_stats.vdevs); + + if (is_end) { + ar->fw_stats.fw_stats_done = true; + num_vdev = 0; + } + return; + } + if (stats->stats_id == WMI_REQUEST_BCN_STAT) { + if (list_empty(&stats->bcn)) { + ath12k_warn(ab, "empty beacon stats"); + return; + } + /* Mark end until we reached the count of all started VDEVs + * within the PDEV + */ + is_end = ((++num_bcn) == ar->num_started_vdevs); + + list_splice_tail_init(&stats->bcn, + &ar->fw_stats.bcn); + + if (is_end) { + ar->fw_stats.fw_stats_done = true; + num_bcn = 0; + } + } +} + static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *skb) { struct ath12k_fw_stats stats = {}; @@ -7655,19 +7712,15 @@ static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *sk spin_lock_bh(&ar->data_lock); - /* WMI_REQUEST_PDEV_STAT can be requested via .get_txpower mac ops or via - * debugfs fw stats. Therefore, processing it separately. - */ + /* Handle WMI_REQUEST_PDEV_STAT status update */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); ar->fw_stats.fw_stats_done = true; goto complete; } - /* WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT are currently requested only - * via debugfs fw stats. Hence, processing these in debugfs context. - */ - ath12k_debugfs_fw_stats_process(ar, &stats); + /* Handle WMI_REQUEST_VDEV_STAT and WMI_REQUEST_BCN_STAT updates. */ + ath12k_wmi_fw_stats_process(ar, &stats); complete: complete(&ar->fw_stats_complete); From 062ade23991e556a14bbb27c1cf873c34dbd9d28 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 22 May 2025 16:54:10 +0800 Subject: [PATCH 379/497] wifi: ath12k: parse and save hardware mode info from WMI_SERVICE_READY_EXT_EVENTID event for later use WLAN hardware might support various hardware modes such as DBS (dual band simultaneously), SBS (single band simultaneously) and DBS_OR_SBS etc, see enum wmi_host_hw_mode_config_type. Firmware advertises actual supported modes in WMI_SERVICE_READY_EXT_EVENTID event. For each mode, firmware advertises frequency range each hardware MAC can operate on. In MLO case such information is necessary during vdev activation and link selection (which is done in following patches), so add a new structure ath12k_svc_ext_info to ath12k_wmi_base, then parse and save those information to it for later use. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250522-ath12k-sbs-dbs-v1-1-54a29e7a3a88@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 69 ++++++++++++++++++++++++++- drivers/net/wireless/ath/ath12k/wmi.h | 31 ++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 2d2444417e2b..edb38fbb4ed4 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -4395,6 +4395,7 @@ static int ath12k_wmi_hw_mode_caps_parse(struct ath12k_base *soc, static int ath12k_wmi_hw_mode_caps(struct ath12k_base *soc, u16 len, const void *ptr, void *data) { + struct ath12k_svc_ext_info *svc_ext_info = &soc->wmi_ab.svc_ext_info; struct ath12k_wmi_svc_rdy_ext_parse *svc_rdy_ext = data; const struct ath12k_wmi_hw_mode_cap_params *hw_mode_caps; enum wmi_host_hw_mode_config_type mode, pref; @@ -4427,8 +4428,11 @@ static int ath12k_wmi_hw_mode_caps(struct ath12k_base *soc, } } - ath12k_dbg(soc, ATH12K_DBG_WMI, "preferred_hw_mode:%d\n", - soc->wmi_ab.preferred_hw_mode); + svc_ext_info->num_hw_modes = svc_rdy_ext->n_hw_mode_caps; + + ath12k_dbg(soc, ATH12K_DBG_WMI, "num hw modes %u preferred_hw_mode %d\n", + svc_ext_info->num_hw_modes, soc->wmi_ab.preferred_hw_mode); + if (soc->wmi_ab.preferred_hw_mode == WMI_HOST_HW_MODE_MAX) return -EINVAL; @@ -4658,6 +4662,65 @@ static int ath12k_wmi_dma_ring_caps(struct ath12k_base *ab, return ret; } +static void +ath12k_wmi_save_mac_phy_info(struct ath12k_base *ab, + const struct ath12k_wmi_mac_phy_caps_params *mac_phy_cap, + struct ath12k_svc_ext_mac_phy_info *mac_phy_info) +{ + mac_phy_info->phy_id = __le32_to_cpu(mac_phy_cap->phy_id); + mac_phy_info->supported_bands = __le32_to_cpu(mac_phy_cap->supported_bands); + mac_phy_info->hw_freq_range.low_2ghz_freq = + __le32_to_cpu(mac_phy_cap->low_2ghz_chan_freq); + mac_phy_info->hw_freq_range.high_2ghz_freq = + __le32_to_cpu(mac_phy_cap->high_2ghz_chan_freq); + mac_phy_info->hw_freq_range.low_5ghz_freq = + __le32_to_cpu(mac_phy_cap->low_5ghz_chan_freq); + mac_phy_info->hw_freq_range.high_5ghz_freq = + __le32_to_cpu(mac_phy_cap->high_5ghz_chan_freq); +} + +static void +ath12k_wmi_save_all_mac_phy_info(struct ath12k_base *ab, + struct ath12k_wmi_svc_rdy_ext_parse *svc_rdy_ext) +{ + struct ath12k_svc_ext_info *svc_ext_info = &ab->wmi_ab.svc_ext_info; + const struct ath12k_wmi_mac_phy_caps_params *mac_phy_cap; + const struct ath12k_wmi_hw_mode_cap_params *hw_mode_cap; + struct ath12k_svc_ext_mac_phy_info *mac_phy_info; + u32 hw_mode_id, phy_bit_map; + u8 hw_idx; + + mac_phy_info = &svc_ext_info->mac_phy_info[0]; + mac_phy_cap = svc_rdy_ext->mac_phy_caps; + + for (hw_idx = 0; hw_idx < svc_ext_info->num_hw_modes; hw_idx++) { + hw_mode_cap = &svc_rdy_ext->hw_mode_caps[hw_idx]; + hw_mode_id = __le32_to_cpu(hw_mode_cap->hw_mode_id); + phy_bit_map = __le32_to_cpu(hw_mode_cap->phy_id_map); + + while (phy_bit_map) { + ath12k_wmi_save_mac_phy_info(ab, mac_phy_cap, mac_phy_info); + mac_phy_info->hw_mode_config_type = + le32_get_bits(hw_mode_cap->hw_mode_config_type, + WMI_HW_MODE_CAP_CFG_TYPE); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "hw_idx %u hw_mode_id %u hw_mode_config_type %u supported_bands %u phy_id %u 2 GHz [%u - %u] 5 GHz [%u - %u]\n", + hw_idx, hw_mode_id, + mac_phy_info->hw_mode_config_type, + mac_phy_info->supported_bands, mac_phy_info->phy_id, + mac_phy_info->hw_freq_range.low_2ghz_freq, + mac_phy_info->hw_freq_range.high_2ghz_freq, + mac_phy_info->hw_freq_range.low_5ghz_freq, + mac_phy_info->hw_freq_range.high_5ghz_freq); + + mac_phy_cap++; + mac_phy_info++; + + phy_bit_map >>= 1; + } + } +} + static int ath12k_wmi_svc_rdy_ext_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *ptr, void *data) @@ -4706,6 +4769,8 @@ static int ath12k_wmi_svc_rdy_ext_parse(struct ath12k_base *ab, return ret; } + ath12k_wmi_save_all_mac_phy_info(ab, svc_rdy_ext); + svc_rdy_ext->mac_phy_done = true; } else if (!svc_rdy_ext->ext_hal_reg_done) { ret = ath12k_wmi_ext_hal_reg_caps(ab, len, ptr, svc_rdy_ext); diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index ac18f75e0449..96c31b7820ec 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -2617,6 +2617,8 @@ struct ath12k_wmi_soc_mac_phy_hw_mode_caps_params { __le32 num_chainmask_tables; } __packed; +#define WMI_HW_MODE_CAP_CFG_TYPE GENMASK(27, 0) + struct ath12k_wmi_hw_mode_cap_params { __le32 tlv_header; __le32 hw_mode_id; @@ -2666,6 +2668,12 @@ struct ath12k_wmi_mac_phy_caps_params { __le32 he_cap_info_2g_ext; __le32 he_cap_info_5g_ext; __le32 he_cap_info_internal; + __le32 wireless_modes; + __le32 low_2ghz_chan_freq; + __le32 high_2ghz_chan_freq; + __le32 low_5ghz_chan_freq; + __le32 high_5ghz_chan_freq; + __le32 nss_ratio; } __packed; struct ath12k_wmi_hal_reg_caps_ext_params { @@ -5049,6 +5057,27 @@ struct ath12k_wmi_pdev { u32 rx_decap_mode; }; +struct ath12k_hw_mode_freq_range_arg { + u32 low_2ghz_freq; + u32 high_2ghz_freq; + u32 low_5ghz_freq; + u32 high_5ghz_freq; +}; + +struct ath12k_svc_ext_mac_phy_info { + enum wmi_host_hw_mode_config_type hw_mode_config_type; + u32 phy_id; + u32 supported_bands; + struct ath12k_hw_mode_freq_range_arg hw_freq_range; +}; + +#define ATH12K_MAX_MAC_PHY_CAP 8 + +struct ath12k_svc_ext_info { + u32 num_hw_modes; + struct ath12k_svc_ext_mac_phy_info mac_phy_info[ATH12K_MAX_MAC_PHY_CAP]; +}; + struct ath12k_wmi_base { struct ath12k_base *ab; struct ath12k_wmi_pdev wmi[MAX_RADIOS]; @@ -5066,6 +5095,8 @@ struct ath12k_wmi_base { enum wmi_host_hw_mode_config_type preferred_hw_mode; struct ath12k_wmi_target_cap_arg *targ_cap; + + struct ath12k_svc_ext_info svc_ext_info; }; struct wmi_pdev_set_bios_interface_cmd { From 241d130f1419fd99923d99aff6e40490a4c34d93 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 22 May 2025 16:54:11 +0800 Subject: [PATCH 380/497] wifi: ath12k: parse and save sbs_lower_band_end_freq from WMI_SERVICE_READY_EXT2_EVENTID event Firmware sends the boundary between lower and higher bands in ath12k_wmi_dbs_or_sbs_cap_params structure embedded in WMI_SERVICE_READY_EXT2_EVENTID event. The boundary is needed when updating frequency range in the following patch. So parse and save it for later use. Note ath12k_wmi_dbs_or_sbs_cap_params is placed after some other structures, so placeholders for them are added as well. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250522-ath12k-sbs-dbs-v1-2-54a29e7a3a88@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 23 +++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/wmi.h | 6 ++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index edb38fbb4ed4..824c910bf1d6 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -91,6 +91,11 @@ struct ath12k_wmi_svc_rdy_ext2_parse { bool dma_ring_cap_done; bool spectral_bin_scaling_done; bool mac_phy_caps_ext_done; + bool hal_reg_caps_ext2_done; + bool scan_radio_caps_ext2_done; + bool twt_caps_done; + bool htt_msdu_idx_to_qtype_map_done; + bool dbs_or_sbs_cap_ext_done; }; struct ath12k_wmi_rdy_parse { @@ -4991,6 +4996,7 @@ static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *ptr, void *data) { + const struct ath12k_wmi_dbs_or_sbs_cap_params *dbs_or_sbs_caps; struct ath12k_wmi_pdev *wmi_handle = &ab->wmi_ab.wmi[0]; struct ath12k_wmi_svc_rdy_ext2_parse *parse = data; int ret; @@ -5032,6 +5038,23 @@ static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab, } parse->mac_phy_caps_ext_done = true; + } else if (!parse->hal_reg_caps_ext2_done) { + parse->hal_reg_caps_ext2_done = true; + } else if (!parse->scan_radio_caps_ext2_done) { + parse->scan_radio_caps_ext2_done = true; + } else if (!parse->twt_caps_done) { + parse->twt_caps_done = true; + } else if (!parse->htt_msdu_idx_to_qtype_map_done) { + parse->htt_msdu_idx_to_qtype_map_done = true; + } else if (!parse->dbs_or_sbs_cap_ext_done) { + dbs_or_sbs_caps = ptr; + ab->wmi_ab.sbs_lower_band_end_freq = + __le32_to_cpu(dbs_or_sbs_caps->sbs_lower_band_end_freq); + + ath12k_dbg(ab, ATH12K_DBG_WMI, "sbs_lower_band_end_freq %u\n", + ab->wmi_ab.sbs_lower_band_end_freq); + + parse->dbs_or_sbs_cap_ext_done = true; } break; default: diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 96c31b7820ec..e69d53054f6d 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -2747,6 +2747,11 @@ struct wmi_service_ready_ext2_event { __le32 default_num_msduq_supported_per_tid; } __packed; +struct ath12k_wmi_dbs_or_sbs_cap_params { + __le32 hw_mode_id; + __le32 sbs_lower_band_end_freq; +} __packed; + struct ath12k_wmi_caps_ext_params { __le32 hw_mode_id; __le32 pdev_and_hw_link_ids; @@ -5097,6 +5102,7 @@ struct ath12k_wmi_base { struct ath12k_wmi_target_cap_arg *targ_cap; struct ath12k_svc_ext_info svc_ext_info; + u32 sbs_lower_band_end_freq; }; struct wmi_pdev_set_bios_interface_cmd { From e47b11e3bd34177af6669ef0945eb23dd4da3bcb Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 22 May 2025 16:54:12 +0800 Subject: [PATCH 381/497] wifi: ath12k: update freq range for each hardware mode Previous patches parse and save hardware MAC frequency range information in ath12k_svc_ext_info structure. Such range represents hardware capability hence needs to be updated based on host information, e.g. guard the range based on host's low/high boundary. So update frequency range. The updated range is saved in ath12k_hw_mode_info structure and would be used when doing vdev activation and link selection in following patches. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250522-ath12k-sbs-dbs-v1-3-54a29e7a3a88@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 446 ++++++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/wmi.h | 27 ++ 2 files changed, 473 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 824c910bf1d6..034c18f6a2d2 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -4992,6 +4992,444 @@ static int ath12k_wmi_tlv_mac_phy_caps_ext(struct ath12k_base *ab, u16 tag, return 0; } +static void +ath12k_wmi_update_freq_info(struct ath12k_base *ab, + struct ath12k_svc_ext_mac_phy_info *mac_cap, + enum ath12k_hw_mode mode, + u32 phy_id) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + + mac_range = &hw_mode_info->freq_range_caps[mode][phy_id]; + + if (mac_cap->supported_bands & WMI_HOST_WLAN_2GHZ_CAP) { + mac_range->low_2ghz_freq = max_t(u32, + mac_cap->hw_freq_range.low_2ghz_freq, + ATH12K_MIN_2GHZ_FREQ); + mac_range->high_2ghz_freq = mac_cap->hw_freq_range.high_2ghz_freq ? + min_t(u32, + mac_cap->hw_freq_range.high_2ghz_freq, + ATH12K_MAX_2GHZ_FREQ) : + ATH12K_MAX_2GHZ_FREQ; + } + + if (mac_cap->supported_bands & WMI_HOST_WLAN_5GHZ_CAP) { + mac_range->low_5ghz_freq = max_t(u32, + mac_cap->hw_freq_range.low_5ghz_freq, + ATH12K_MIN_5GHZ_FREQ); + mac_range->high_5ghz_freq = mac_cap->hw_freq_range.high_5ghz_freq ? + min_t(u32, + mac_cap->hw_freq_range.high_5ghz_freq, + ATH12K_MAX_6GHZ_FREQ) : + ATH12K_MAX_6GHZ_FREQ; + } +} + +static bool +ath12k_wmi_all_phy_range_updated(struct ath12k_base *ab, + enum ath12k_hw_mode hwmode) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + mac_range = &hw_mode_info->freq_range_caps[hwmode][phy_id]; + /* modify SBS/DBS range only when both phy for DBS are filled */ + if (!mac_range->low_2ghz_freq && !mac_range->low_5ghz_freq) + return false; + } + + return true; +} + +static void ath12k_wmi_update_dbs_freq_info(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u8 phy_id; + + mac_range = hw_mode_info->freq_range_caps[ATH12K_HW_MODE_DBS]; + /* Reset 5 GHz range for shared mac for DBS */ + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if (mac_range[phy_id].low_2ghz_freq && + mac_range[phy_id].low_5ghz_freq) { + mac_range[phy_id].low_5ghz_freq = 0; + mac_range[phy_id].high_5ghz_freq = 0; + } + } +} + +static u32 +ath12k_wmi_get_highest_5ghz_freq_from_range(struct ath12k_hw_mode_freq_range_arg *range) +{ + u32 highest_freq = 0; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if (range[phy_id].high_5ghz_freq > highest_freq) + highest_freq = range[phy_id].high_5ghz_freq; + } + + return highest_freq ? highest_freq : ATH12K_MAX_6GHZ_FREQ; +} + +static u32 +ath12k_wmi_get_lowest_5ghz_freq_from_range(struct ath12k_hw_mode_freq_range_arg *range) +{ + u32 lowest_freq = 0; + u8 phy_id; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + if ((!lowest_freq && range[phy_id].low_5ghz_freq) || + range[phy_id].low_5ghz_freq < lowest_freq) + lowest_freq = range[phy_id].low_5ghz_freq; + } + + return lowest_freq ? lowest_freq : ATH12K_MIN_5GHZ_FREQ; +} + +static void +ath12k_wmi_fill_upper_share_sbs_freq(struct ath12k_base *ab, + u16 sbs_range_sep, + struct ath12k_hw_mode_freq_range_arg *ref_freq) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *upper_sbs_freq_range; + u8 phy_id; + + upper_sbs_freq_range = + hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS_UPPER_SHARE]; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + upper_sbs_freq_range[phy_id].low_2ghz_freq = + ref_freq[phy_id].low_2ghz_freq; + upper_sbs_freq_range[phy_id].high_2ghz_freq = + ref_freq[phy_id].high_2ghz_freq; + + /* update for shared mac */ + if (upper_sbs_freq_range[phy_id].low_2ghz_freq) { + upper_sbs_freq_range[phy_id].low_5ghz_freq = sbs_range_sep + 10; + upper_sbs_freq_range[phy_id].high_5ghz_freq = + ath12k_wmi_get_highest_5ghz_freq_from_range(ref_freq); + } else { + upper_sbs_freq_range[phy_id].low_5ghz_freq = + ath12k_wmi_get_lowest_5ghz_freq_from_range(ref_freq); + upper_sbs_freq_range[phy_id].high_5ghz_freq = sbs_range_sep; + } + } +} + +static void +ath12k_wmi_fill_lower_share_sbs_freq(struct ath12k_base *ab, + u16 sbs_range_sep, + struct ath12k_hw_mode_freq_range_arg *ref_freq) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *lower_sbs_freq_range; + u8 phy_id; + + lower_sbs_freq_range = + hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS_LOWER_SHARE]; + + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + lower_sbs_freq_range[phy_id].low_2ghz_freq = + ref_freq[phy_id].low_2ghz_freq; + lower_sbs_freq_range[phy_id].high_2ghz_freq = + ref_freq[phy_id].high_2ghz_freq; + + /* update for shared mac */ + if (lower_sbs_freq_range[phy_id].low_2ghz_freq) { + lower_sbs_freq_range[phy_id].low_5ghz_freq = + ath12k_wmi_get_lowest_5ghz_freq_from_range(ref_freq); + lower_sbs_freq_range[phy_id].high_5ghz_freq = sbs_range_sep; + } else { + lower_sbs_freq_range[phy_id].low_5ghz_freq = sbs_range_sep + 10; + lower_sbs_freq_range[phy_id].high_5ghz_freq = + ath12k_wmi_get_highest_5ghz_freq_from_range(ref_freq); + } + } +} + +static const char *ath12k_wmi_hw_mode_to_str(enum ath12k_hw_mode hw_mode) +{ + static const char * const mode_str[] = { + [ATH12K_HW_MODE_SMM] = "SMM", + [ATH12K_HW_MODE_DBS] = "DBS", + [ATH12K_HW_MODE_SBS] = "SBS", + [ATH12K_HW_MODE_SBS_UPPER_SHARE] = "SBS_UPPER_SHARE", + [ATH12K_HW_MODE_SBS_LOWER_SHARE] = "SBS_LOWER_SHARE", + }; + + if (hw_mode >= ARRAY_SIZE(mode_str)) + return "Unknown"; + + return mode_str[hw_mode]; +} + +static void +ath12k_wmi_dump_freq_range_per_mac(struct ath12k_base *ab, + struct ath12k_hw_mode_freq_range_arg *freq_range, + enum ath12k_hw_mode hw_mode) +{ + u8 i; + + for (i = 0; i < MAX_RADIOS; i++) + if (freq_range[i].low_2ghz_freq || freq_range[i].low_5ghz_freq) + ath12k_dbg(ab, ATH12K_DBG_WMI, + "frequency range: %s(%d) mac %d 2 GHz [%d - %d] 5 GHz [%d - %d]", + ath12k_wmi_hw_mode_to_str(hw_mode), + hw_mode, i, + freq_range[i].low_2ghz_freq, + freq_range[i].high_2ghz_freq, + freq_range[i].low_5ghz_freq, + freq_range[i].high_5ghz_freq); +} + +static void ath12k_wmi_dump_freq_range(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_freq_range_arg *freq_range; + u8 i; + + for (i = ATH12K_HW_MODE_SMM; i < ATH12K_HW_MODE_MAX; i++) { + freq_range = ab->wmi_ab.hw_mode_info.freq_range_caps[i]; + ath12k_wmi_dump_freq_range_per_mac(ab, freq_range, i); + } +} + +static int ath12k_wmi_modify_sbs_freq(struct ath12k_base *ab, u8 phy_id) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *sbs_mac_range, *shared_mac_range; + struct ath12k_hw_mode_freq_range_arg *non_shared_range; + u8 shared_phy_id; + + sbs_mac_range = &hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS][phy_id]; + + /* if SBS mac range has both 2.4 and 5 GHz ranges, i.e. shared phy_id + * keep the range as it is in SBS + */ + if (sbs_mac_range->low_2ghz_freq && sbs_mac_range->low_5ghz_freq) + return 0; + + if (sbs_mac_range->low_2ghz_freq && !sbs_mac_range->low_5ghz_freq) { + ath12k_err(ab, "Invalid DBS/SBS mode with only 2.4Ghz"); + ath12k_wmi_dump_freq_range_per_mac(ab, sbs_mac_range, ATH12K_HW_MODE_SBS); + return -EINVAL; + } + + non_shared_range = sbs_mac_range; + /* if SBS mac range has only 5 GHz then it's the non-shared phy, so + * modify the range as per the shared mac. + */ + shared_phy_id = phy_id ? 0 : 1; + shared_mac_range = + &hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS][shared_phy_id]; + + if (shared_mac_range->low_5ghz_freq > non_shared_range->low_5ghz_freq) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "high 5 GHz shared"); + /* If the shared mac lower 5 GHz frequency is greater than + * non-shared mac lower 5 GHz frequency then the shared mac has + * high 5 GHz shared with 2.4 GHz. So non-shared mac's 5 GHz high + * freq should be less than the shared mac's low 5 GHz freq. + */ + if (non_shared_range->high_5ghz_freq >= + shared_mac_range->low_5ghz_freq) + non_shared_range->high_5ghz_freq = + max_t(u32, shared_mac_range->low_5ghz_freq - 10, + non_shared_range->low_5ghz_freq); + } else if (shared_mac_range->high_5ghz_freq < + non_shared_range->high_5ghz_freq) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "low 5 GHz shared"); + /* If the shared mac high 5 GHz frequency is less than + * non-shared mac high 5 GHz frequency then the shared mac has + * low 5 GHz shared with 2.4 GHz. So non-shared mac's 5 GHz low + * freq should be greater than the shared mac's high 5 GHz freq. + */ + if (shared_mac_range->high_5ghz_freq >= + non_shared_range->low_5ghz_freq) + non_shared_range->low_5ghz_freq = + min_t(u32, shared_mac_range->high_5ghz_freq + 10, + non_shared_range->high_5ghz_freq); + } else { + ath12k_warn(ab, "invalid SBS range with all 5 GHz shared"); + return -EINVAL; + } + + return 0; +} + +static void ath12k_wmi_update_sbs_freq_info(struct ath12k_base *ab) +{ + struct ath12k_hw_mode_info *hw_mode_info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *mac_range; + u16 sbs_range_sep; + u8 phy_id; + int ret; + + mac_range = hw_mode_info->freq_range_caps[ATH12K_HW_MODE_SBS]; + + /* If sbs_lower_band_end_freq has a value, then the frequency range + * will be split using that value. + */ + sbs_range_sep = ab->wmi_ab.sbs_lower_band_end_freq; + if (sbs_range_sep) { + ath12k_wmi_fill_upper_share_sbs_freq(ab, sbs_range_sep, + mac_range); + ath12k_wmi_fill_lower_share_sbs_freq(ab, sbs_range_sep, + mac_range); + /* Hardware specifies the range boundary with sbs_range_sep, + * (i.e. the boundary between 5 GHz high and 5 GHz low), + * reset the original one to make sure it will not get used. + */ + memset(mac_range, 0, sizeof(*mac_range) * MAX_RADIOS); + return; + } + + /* If sbs_lower_band_end_freq is not set that means firmware will send one + * shared mac range and one non-shared mac range. so update that freq. + */ + for (phy_id = 0; phy_id < MAX_RADIOS; phy_id++) { + ret = ath12k_wmi_modify_sbs_freq(ab, phy_id); + if (ret) { + memset(mac_range, 0, sizeof(*mac_range) * MAX_RADIOS); + break; + } + } +} + +static void +ath12k_wmi_update_mac_freq_info(struct ath12k_base *ab, + enum wmi_host_hw_mode_config_type hw_config_type, + u32 phy_id, + struct ath12k_svc_ext_mac_phy_info *mac_cap) +{ + if (phy_id >= MAX_RADIOS) { + ath12k_err(ab, "mac more than two not supported: %d", phy_id); + return; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, + "hw_mode_cfg %d mac %d band 0x%x SBS cutoff freq %d 2 GHz [%d - %d] 5 GHz [%d - %d]", + hw_config_type, phy_id, mac_cap->supported_bands, + ab->wmi_ab.sbs_lower_band_end_freq, + mac_cap->hw_freq_range.low_2ghz_freq, + mac_cap->hw_freq_range.high_2ghz_freq, + mac_cap->hw_freq_range.low_5ghz_freq, + mac_cap->hw_freq_range.high_5ghz_freq); + + switch (hw_config_type) { + case WMI_HOST_HW_MODE_SINGLE: + if (phy_id) { + ath12k_dbg(ab, ATH12K_DBG_WMI, "mac phy 1 is not supported"); + break; + } + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SMM, phy_id); + break; + + case WMI_HOST_HW_MODE_DBS: + if (!ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_DBS)) + ath12k_wmi_update_freq_info(ab, mac_cap, + ATH12K_HW_MODE_DBS, phy_id); + break; + case WMI_HOST_HW_MODE_DBS_SBS: + case WMI_HOST_HW_MODE_DBS_OR_SBS: + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_DBS, phy_id); + if (ab->wmi_ab.sbs_lower_band_end_freq || + mac_cap->hw_freq_range.low_5ghz_freq || + mac_cap->hw_freq_range.low_2ghz_freq) + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SBS, + phy_id); + + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_DBS)) + ath12k_wmi_update_dbs_freq_info(ab); + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS)) + ath12k_wmi_update_sbs_freq_info(ab); + break; + case WMI_HOST_HW_MODE_SBS: + case WMI_HOST_HW_MODE_SBS_PASSIVE: + ath12k_wmi_update_freq_info(ab, mac_cap, ATH12K_HW_MODE_SBS, phy_id); + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS)) + ath12k_wmi_update_sbs_freq_info(ab); + + break; + default: + break; + } +} + +static bool ath12k_wmi_sbs_range_present(struct ath12k_base *ab) +{ + if (ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS) || + (ab->wmi_ab.sbs_lower_band_end_freq && + ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS_LOWER_SHARE) && + ath12k_wmi_all_phy_range_updated(ab, ATH12K_HW_MODE_SBS_UPPER_SHARE))) + return true; + + return false; +} + +static int ath12k_wmi_update_hw_mode_list(struct ath12k_base *ab) +{ + struct ath12k_svc_ext_info *svc_ext_info = &ab->wmi_ab.svc_ext_info; + struct ath12k_hw_mode_info *info = &ab->wmi_ab.hw_mode_info; + enum wmi_host_hw_mode_config_type hw_config_type; + struct ath12k_svc_ext_mac_phy_info *tmp; + bool dbs_mode = false, sbs_mode = false; + u32 i, j = 0; + + if (!svc_ext_info->num_hw_modes) { + ath12k_err(ab, "invalid number of hw modes"); + return -EINVAL; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, "updated HW mode list: num modes %d", + svc_ext_info->num_hw_modes); + + memset(info->freq_range_caps, 0, sizeof(info->freq_range_caps)); + + for (i = 0; i < svc_ext_info->num_hw_modes; i++) { + if (j >= ATH12K_MAX_MAC_PHY_CAP) + return -EINVAL; + + /* Update for MAC0 */ + tmp = &svc_ext_info->mac_phy_info[j++]; + hw_config_type = tmp->hw_mode_config_type; + ath12k_wmi_update_mac_freq_info(ab, hw_config_type, tmp->phy_id, tmp); + + /* SBS and DBS have dual MAC. Up to 2 MACs are considered. */ + if (hw_config_type == WMI_HOST_HW_MODE_DBS || + hw_config_type == WMI_HOST_HW_MODE_SBS_PASSIVE || + hw_config_type == WMI_HOST_HW_MODE_SBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS) { + if (j >= ATH12K_MAX_MAC_PHY_CAP) + return -EINVAL; + /* Update for MAC1 */ + tmp = &svc_ext_info->mac_phy_info[j++]; + ath12k_wmi_update_mac_freq_info(ab, hw_config_type, + tmp->phy_id, tmp); + + if (hw_config_type == WMI_HOST_HW_MODE_DBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS) + dbs_mode = true; + + if (ath12k_wmi_sbs_range_present(ab) && + (hw_config_type == WMI_HOST_HW_MODE_SBS_PASSIVE || + hw_config_type == WMI_HOST_HW_MODE_SBS || + hw_config_type == WMI_HOST_HW_MODE_DBS_OR_SBS)) + sbs_mode = true; + } + } + + info->support_dbs = dbs_mode; + info->support_sbs = sbs_mode; + + ath12k_wmi_dump_freq_range(ab); + + return 0; +} + static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *ptr, void *data) @@ -5054,8 +5492,16 @@ static int ath12k_wmi_svc_rdy_ext2_parse(struct ath12k_base *ab, ath12k_dbg(ab, ATH12K_DBG_WMI, "sbs_lower_band_end_freq %u\n", ab->wmi_ab.sbs_lower_band_end_freq); + ret = ath12k_wmi_update_hw_mode_list(ab); + if (ret) { + ath12k_warn(ab, "failed to update hw mode list: %d\n", + ret); + return ret; + } + parse->dbs_or_sbs_cap_ext_done = true; } + break; default: break; diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index e69d53054f6d..f2a04a7bd91a 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -5083,6 +5083,32 @@ struct ath12k_svc_ext_info { struct ath12k_svc_ext_mac_phy_info mac_phy_info[ATH12K_MAX_MAC_PHY_CAP]; }; +/** + * enum ath12k_hw_mode - enum for host mode + * @ATH12K_HW_MODE_SMM: Single mac mode + * @ATH12K_HW_MODE_DBS: DBS mode + * @ATH12K_HW_MODE_SBS: SBS mode with either high share or low share + * @ATH12K_HW_MODE_SBS_UPPER_SHARE: Higher 5 GHz shared with 2.4 GHz + * @ATH12K_HW_MODE_SBS_LOWER_SHARE: Lower 5 GHz shared with 2.4 GHz + * @ATH12K_HW_MODE_MAX: Max, used to indicate invalid mode + */ +enum ath12k_hw_mode { + ATH12K_HW_MODE_SMM, + ATH12K_HW_MODE_DBS, + ATH12K_HW_MODE_SBS, + ATH12K_HW_MODE_SBS_UPPER_SHARE, + ATH12K_HW_MODE_SBS_LOWER_SHARE, + ATH12K_HW_MODE_MAX, +}; + +struct ath12k_hw_mode_info { + bool support_dbs:1; + bool support_sbs:1; + + struct ath12k_hw_mode_freq_range_arg freq_range_caps[ATH12K_HW_MODE_MAX] + [MAX_RADIOS]; +}; + struct ath12k_wmi_base { struct ath12k_base *ab; struct ath12k_wmi_pdev wmi[MAX_RADIOS]; @@ -5103,6 +5129,7 @@ struct ath12k_wmi_base { struct ath12k_svc_ext_info svc_ext_info; u32 sbs_lower_band_end_freq; + struct ath12k_hw_mode_info hw_mode_info; }; struct wmi_pdev_set_bios_interface_cmd { From c36f2cd628f9df6ffb19c23eedaa666440b8f5c5 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 22 May 2025 16:54:13 +0800 Subject: [PATCH 382/497] wifi: ath12k: support WMI_MLO_LINK_SET_ACTIVE_CMDID command Add WMI_MLO_LINK_SET_ACTIVE_CMDID command. This command allows host to send required link information to firmware such that firmware can make decision on activating/deactivating links in various scenarios. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250522-ath12k-sbs-dbs-v1-4-54a29e7a3a88@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 221 ++++++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/wmi.h | 116 +++++++++++++- 2 files changed, 336 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 034c18f6a2d2..6dbd272db1d5 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -10498,3 +10498,224 @@ int ath12k_wmi_send_vdev_set_tpc_power(struct ath12k *ar, return 0; } + +static int +ath12k_wmi_fill_disallowed_bmap(struct ath12k_base *ab, + struct wmi_disallowed_mlo_mode_bitmap_params *dislw_bmap, + struct wmi_mlo_link_set_active_arg *arg) +{ + struct wmi_ml_disallow_mode_bmap_arg *dislw_bmap_arg; + u8 i; + + if (arg->num_disallow_mode_comb > + ARRAY_SIZE(arg->disallow_bmap)) { + ath12k_warn(ab, "invalid num_disallow_mode_comb: %d", + arg->num_disallow_mode_comb); + return -EINVAL; + } + + dislw_bmap_arg = &arg->disallow_bmap[0]; + for (i = 0; i < arg->num_disallow_mode_comb; i++) { + dislw_bmap->tlv_header = + ath12k_wmi_tlv_cmd_hdr(0, sizeof(*dislw_bmap)); + dislw_bmap->disallowed_mode_bitmap = + cpu_to_le32(dislw_bmap_arg->disallowed_mode); + dislw_bmap->ieee_link_id_comb = + le32_encode_bits(dislw_bmap_arg->ieee_link_id[0], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_1) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[1], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_2) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[2], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_3) | + le32_encode_bits(dislw_bmap_arg->ieee_link_id[3], + WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_4); + + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d disallowed_mode %d ieee_link_id_comb 0x%x", + i, dislw_bmap_arg->disallowed_mode, + dislw_bmap_arg->ieee_link_id_comb); + dislw_bmap++; + dislw_bmap_arg++; + } + + return 0; +} + +int ath12k_wmi_send_mlo_link_set_active_cmd(struct ath12k_base *ab, + struct wmi_mlo_link_set_active_arg *arg) +{ + struct wmi_disallowed_mlo_mode_bitmap_params *disallowed_mode_bmap; + struct wmi_mlo_set_active_link_number_params *link_num_param; + u32 num_link_num_param = 0, num_vdev_bitmap = 0; + struct ath12k_wmi_base *wmi_ab = &ab->wmi_ab; + struct wmi_mlo_link_set_active_cmd *cmd; + u32 num_inactive_vdev_bitmap = 0; + u32 num_disallow_mode_comb = 0; + struct wmi_tlv *tlv; + struct sk_buff *skb; + __le32 *vdev_bitmap; + void *buf_ptr; + int i, ret; + u32 len; + + if (!arg->num_vdev_bitmap && !arg->num_link_entry) { + ath12k_warn(ab, "Invalid num_vdev_bitmap and num_link_entry"); + return -EINVAL; + } + + switch (arg->force_mode) { + case WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM: + case WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM: + num_link_num_param = arg->num_link_entry; + fallthrough; + case WMI_MLO_LINK_FORCE_MODE_ACTIVE: + case WMI_MLO_LINK_FORCE_MODE_INACTIVE: + case WMI_MLO_LINK_FORCE_MODE_NO_FORCE: + num_vdev_bitmap = arg->num_vdev_bitmap; + break; + case WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE: + num_vdev_bitmap = arg->num_vdev_bitmap; + num_inactive_vdev_bitmap = arg->num_inactive_vdev_bitmap; + break; + default: + ath12k_warn(ab, "Invalid force mode: %u", arg->force_mode); + return -EINVAL; + } + + num_disallow_mode_comb = arg->num_disallow_mode_comb; + len = sizeof(*cmd) + + TLV_HDR_SIZE + sizeof(*link_num_param) * num_link_num_param + + TLV_HDR_SIZE + sizeof(*vdev_bitmap) * num_vdev_bitmap + + TLV_HDR_SIZE + TLV_HDR_SIZE + TLV_HDR_SIZE + + TLV_HDR_SIZE + sizeof(*disallowed_mode_bmap) * num_disallow_mode_comb; + if (arg->force_mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) + len += sizeof(*vdev_bitmap) * num_inactive_vdev_bitmap; + + skb = ath12k_wmi_alloc_skb(wmi_ab, len); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_mlo_link_set_active_cmd *)skb->data; + cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_LINK_SET_ACTIVE_CMD, + sizeof(*cmd)); + cmd->force_mode = cpu_to_le32(arg->force_mode); + cmd->reason = cpu_to_le32(arg->reason); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "mode %d reason %d num_link_num_param %d num_vdev_bitmap %d inactive %d num_disallow_mode_comb %d", + arg->force_mode, arg->reason, num_link_num_param, + num_vdev_bitmap, num_inactive_vdev_bitmap, + num_disallow_mode_comb); + + buf_ptr = skb->data + sizeof(*cmd); + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*link_num_param) * num_link_num_param); + buf_ptr += TLV_HDR_SIZE; + + if (num_link_num_param) { + cmd->ctrl_flags = + le32_encode_bits(arg->ctrl_flags.dync_force_link_num ? 1 : 0, + CRTL_F_DYNC_FORCE_LINK_NUM); + + link_num_param = buf_ptr; + for (i = 0; i < num_link_num_param; i++) { + link_num_param->tlv_header = + ath12k_wmi_tlv_cmd_hdr(0, sizeof(*link_num_param)); + link_num_param->num_of_link = + cpu_to_le32(arg->link_num[i].num_of_link); + link_num_param->vdev_type = + cpu_to_le32(arg->link_num[i].vdev_type); + link_num_param->vdev_subtype = + cpu_to_le32(arg->link_num[i].vdev_subtype); + link_num_param->home_freq = + cpu_to_le32(arg->link_num[i].home_freq); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d num_of_link %d vdev type %d subtype %d freq %d control_flags %d", + i, arg->link_num[i].num_of_link, + arg->link_num[i].vdev_type, + arg->link_num[i].vdev_subtype, + arg->link_num[i].home_freq, + __le32_to_cpu(cmd->ctrl_flags)); + link_num_param++; + } + + buf_ptr += sizeof(*link_num_param) * num_link_num_param; + } + + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, + sizeof(*vdev_bitmap) * num_vdev_bitmap); + buf_ptr += TLV_HDR_SIZE; + + if (num_vdev_bitmap) { + vdev_bitmap = buf_ptr; + for (i = 0; i < num_vdev_bitmap; i++) { + vdev_bitmap[i] = cpu_to_le32(arg->vdev_bitmap[i]); + ath12k_dbg(ab, ATH12K_DBG_WMI, "entry %d vdev_id_bitmap 0x%x", + i, arg->vdev_bitmap[i]); + } + + buf_ptr += sizeof(*vdev_bitmap) * num_vdev_bitmap; + } + + if (arg->force_mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) { + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, + sizeof(*vdev_bitmap) * + num_inactive_vdev_bitmap); + buf_ptr += TLV_HDR_SIZE; + + if (num_inactive_vdev_bitmap) { + vdev_bitmap = buf_ptr; + for (i = 0; i < num_inactive_vdev_bitmap; i++) { + vdev_bitmap[i] = + cpu_to_le32(arg->inactive_vdev_bitmap[i]); + ath12k_dbg(ab, ATH12K_DBG_WMI, + "entry %d inactive_vdev_id_bitmap 0x%x", + i, arg->inactive_vdev_bitmap[i]); + } + + buf_ptr += sizeof(*vdev_bitmap) * num_inactive_vdev_bitmap; + } + } else { + /* add empty vdev bitmap2 tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + } + + /* add empty ieee_link_id_bitmap tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + + /* add empty ieee_link_id_bitmap2 tlv */ + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, 0); + buf_ptr += TLV_HDR_SIZE; + + tlv = buf_ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*disallowed_mode_bmap) * + arg->num_disallow_mode_comb); + buf_ptr += TLV_HDR_SIZE; + + ret = ath12k_wmi_fill_disallowed_bmap(ab, buf_ptr, arg); + if (ret) + goto free_skb; + + ret = ath12k_wmi_cmd_send(&wmi_ab->wmi[0], skb, WMI_MLO_LINK_SET_ACTIVE_CMDID); + if (ret) { + ath12k_warn(ab, + "failed to send WMI_MLO_LINK_SET_ACTIVE_CMDID: %d\n", ret); + goto free_skb; + } + + ath12k_dbg(ab, ATH12K_DBG_WMI, "WMI mlo link set active cmd"); + + return ret; + +free_skb: + dev_kfree_skb(skb); + return ret; +} diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index f2a04a7bd91a..c640ffa180c8 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -1974,6 +1974,7 @@ enum wmi_tlv_tag { WMI_TAG_TPC_STATS_CTL_PWR_TABLE_EVENT, WMI_TAG_VDEV_SET_TPC_POWER_CMD = 0x3B5, WMI_TAG_VDEV_CH_POWER_INFO, + WMI_TAG_MLO_LINK_SET_ACTIVE_CMD = 0x3BE, WMI_TAG_EHT_RATE_SET = 0x3C4, WMI_TAG_DCS_AWGN_INT_TYPE = 0x3C5, WMI_TAG_MLO_TX_SEND_PARAMS, @@ -6061,6 +6062,118 @@ struct wmi_vdev_set_tpc_power_cmd { */ } __packed; +#define CRTL_F_DYNC_FORCE_LINK_NUM GENMASK(3, 2) + +struct wmi_mlo_link_set_active_cmd { + __le32 tlv_header; + __le32 force_mode; + __le32 reason; + __le32 use_ieee_link_id_bitmap; + struct ath12k_wmi_mac_addr_params ap_mld_mac_addr; + __le32 ctrl_flags; +} __packed; + +struct wmi_mlo_set_active_link_number_params { + __le32 tlv_header; + __le32 num_of_link; + __le32 vdev_type; + __le32 vdev_subtype; + __le32 home_freq; +} __packed; + +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_1 GENMASK(7, 0) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_2 GENMASK(15, 8) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_3 GENMASK(23, 16) +#define WMI_DISALW_MLO_MODE_BMAP_IEEE_LINK_ID_COMB_4 GENMASK(31, 24) + +struct wmi_disallowed_mlo_mode_bitmap_params { + __le32 tlv_header; + __le32 disallowed_mode_bitmap; + __le32 ieee_link_id_comb; +} __packed; + +enum wmi_mlo_link_force_mode { + WMI_MLO_LINK_FORCE_MODE_ACTIVE = 1, + WMI_MLO_LINK_FORCE_MODE_INACTIVE = 2, + WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM = 3, + WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM = 4, + WMI_MLO_LINK_FORCE_MODE_NO_FORCE = 5, + WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE = 6, + WMI_MLO_LINK_FORCE_MODE_NON_FORCE_UPDATE = 7, +}; + +enum wmi_mlo_link_force_reason { + WMI_MLO_LINK_FORCE_REASON_NEW_CONNECT = 1, + WMI_MLO_LINK_FORCE_REASON_NEW_DISCONNECT = 2, + WMI_MLO_LINK_FORCE_REASON_LINK_REMOVAL = 3, + WMI_MLO_LINK_FORCE_REASON_TDLS = 4, + WMI_MLO_LINK_FORCE_REASON_REVERT_FAILURE = 5, + WMI_MLO_LINK_FORCE_REASON_LINK_DELETE = 6, + WMI_MLO_LINK_FORCE_REASON_SINGLE_LINK_EMLSR_OP = 7, +}; + +struct wmi_mlo_link_num_arg { + u32 num_of_link; + u32 vdev_type; + u32 vdev_subtype; + u32 home_freq; +}; + +struct wmi_mlo_control_flags_arg { + bool overwrite_force_active_bitmap; + bool overwrite_force_inactive_bitmap; + bool dync_force_link_num; + bool post_re_evaluate; + u8 post_re_evaluate_loops; + bool dont_reschedule_workqueue; +}; + +struct wmi_ml_link_force_cmd_arg { + u8 ap_mld_mac_addr[ETH_ALEN]; + u16 ieee_link_id_bitmap; + u16 ieee_link_id_bitmap2; + u8 link_num; +}; + +struct wmi_ml_disallow_mode_bmap_arg { + u32 disallowed_mode; + union { + u32 ieee_link_id_comb; + u8 ieee_link_id[4]; + }; +}; + +/* maximum size of link number param array + * for MLO link set active command + */ +#define WMI_MLO_LINK_NUM_SZ 2 + +/* maximum size of vdev bitmap array for + * MLO link set active command + */ +#define WMI_MLO_VDEV_BITMAP_SZ 2 + +/* Max number of disallowed bitmap combination + * sent to firmware + */ +#define WMI_ML_MAX_DISALLOW_BMAP_COMB 4 + +struct wmi_mlo_link_set_active_arg { + enum wmi_mlo_link_force_mode force_mode; + enum wmi_mlo_link_force_reason reason; + u32 num_link_entry; + u32 num_vdev_bitmap; + u32 num_inactive_vdev_bitmap; + struct wmi_mlo_link_num_arg link_num[WMI_MLO_LINK_NUM_SZ]; + u32 vdev_bitmap[WMI_MLO_VDEV_BITMAP_SZ]; + u32 inactive_vdev_bitmap[WMI_MLO_VDEV_BITMAP_SZ]; + struct wmi_mlo_control_flags_arg ctrl_flags; + bool use_ieee_link_id; + struct wmi_ml_link_force_cmd_arg force_cmd; + u32 num_disallow_mode_comb; + struct wmi_ml_disallow_mode_bmap_arg disallow_bmap[WMI_ML_MAX_DISALLOW_BMAP_COMB]; +}; + void ath12k_wmi_init_qcn9274(struct ath12k_base *ab, struct ath12k_wmi_resource_config_arg *config); void ath12k_wmi_init_wcn7850(struct ath12k_base *ab, @@ -6259,5 +6372,6 @@ bool ath12k_wmi_supports_6ghz_cc_ext(struct ath12k *ar); int ath12k_wmi_send_vdev_set_tpc_power(struct ath12k *ar, u32 vdev_id, struct ath12k_reg_tpc_power_info *param); - +int ath12k_wmi_send_mlo_link_set_active_cmd(struct ath12k_base *ab, + struct wmi_mlo_link_set_active_arg *param); #endif From 2296038fd35a6e57af77496affe9044293ed2947 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 22 May 2025 16:54:14 +0800 Subject: [PATCH 383/497] wifi: ath12k: update link active in case two links fall on the same MAC In case of two links established on the same device in an ML connection, depending on device's hardware mode capability, it is possible that both links fall on the same MAC. Currently, no specific action is taken to address this but just keep both links active. However this would result in lower throughput compared to even one link, because switching between these two links on the resulted MAC significantly impacts throughput. Check if both links fall in the frequency range of a single MAC. If so, send WMI_MLO_LINK_SET_ACTIVE_CMDID command to firmware such that firmware can deactivate one of them. Note the decision of which link getting deactivated is made by firmware, host only sends the vdev lists. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250522-ath12k-sbs-dbs-v1-5-54a29e7a3a88@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 227 +++++++++++++++++++++++++- drivers/net/wireless/ath/ath12k/mac.h | 2 + 2 files changed, 228 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 88b59f3ff87a..2fae4b01ec90 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5890,6 +5890,211 @@ static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, return ret; } +static bool ath12k_mac_is_freq_on_mac(struct ath12k_hw_mode_freq_range_arg *freq_range, + u32 freq, u8 mac_id) +{ + return (freq >= freq_range[mac_id].low_2ghz_freq && + freq <= freq_range[mac_id].high_2ghz_freq) || + (freq >= freq_range[mac_id].low_5ghz_freq && + freq <= freq_range[mac_id].high_5ghz_freq); +} + +static bool +ath12k_mac_2_freq_same_mac_in_freq_range(struct ath12k_base *ab, + struct ath12k_hw_mode_freq_range_arg *freq_range, + u32 freq_link1, u32 freq_link2) +{ + u8 i; + + for (i = 0; i < MAX_RADIOS; i++) { + if (ath12k_mac_is_freq_on_mac(freq_range, freq_link1, i) && + ath12k_mac_is_freq_on_mac(freq_range, freq_link2, i)) + return true; + } + + return false; +} + +static bool ath12k_mac_is_hw_dbs_capable(struct ath12k_base *ab) +{ + return test_bit(WMI_TLV_SERVICE_DUAL_BAND_SIMULTANEOUS_SUPPORT, + ab->wmi_ab.svc_map) && + ab->wmi_ab.hw_mode_info.support_dbs; +} + +static bool ath12k_mac_2_freq_same_mac_in_dbs(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + struct ath12k_hw_mode_freq_range_arg *freq_range; + + if (!ath12k_mac_is_hw_dbs_capable(ab)) + return true; + + freq_range = ab->wmi_ab.hw_mode_info.freq_range_caps[ATH12K_HW_MODE_DBS]; + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, freq_range, + freq_link1, freq_link2); +} + +static bool ath12k_mac_is_hw_sbs_capable(struct ath12k_base *ab) +{ + return test_bit(WMI_TLV_SERVICE_DUAL_BAND_SIMULTANEOUS_SUPPORT, + ab->wmi_ab.svc_map) && + ab->wmi_ab.hw_mode_info.support_sbs; +} + +static bool ath12k_mac_2_freq_same_mac_in_sbs(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + struct ath12k_hw_mode_info *info = &ab->wmi_ab.hw_mode_info; + struct ath12k_hw_mode_freq_range_arg *sbs_uppr_share; + struct ath12k_hw_mode_freq_range_arg *sbs_low_share; + struct ath12k_hw_mode_freq_range_arg *sbs_range; + + if (!ath12k_mac_is_hw_sbs_capable(ab)) + return true; + + if (ab->wmi_ab.sbs_lower_band_end_freq) { + sbs_uppr_share = info->freq_range_caps[ATH12K_HW_MODE_SBS_UPPER_SHARE]; + sbs_low_share = info->freq_range_caps[ATH12K_HW_MODE_SBS_LOWER_SHARE]; + + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_low_share, + freq_link1, freq_link2) || + ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_uppr_share, + freq_link1, freq_link2); + } + + sbs_range = info->freq_range_caps[ATH12K_HW_MODE_SBS]; + return ath12k_mac_2_freq_same_mac_in_freq_range(ab, sbs_range, + freq_link1, freq_link2); +} + +static bool ath12k_mac_freqs_on_same_mac(struct ath12k_base *ab, + u32 freq_link1, u32 freq_link2) +{ + return ath12k_mac_2_freq_same_mac_in_dbs(ab, freq_link1, freq_link2) && + ath12k_mac_2_freq_same_mac_in_sbs(ab, freq_link1, freq_link2); +} + +static int ath12k_mac_mlo_sta_set_link_active(struct ath12k_base *ab, + enum wmi_mlo_link_force_reason reason, + enum wmi_mlo_link_force_mode mode, + u8 *mlo_vdev_id_lst, + u8 num_mlo_vdev, + u8 *mlo_inactive_vdev_lst, + u8 num_mlo_inactive_vdev) +{ + struct wmi_mlo_link_set_active_arg param = {0}; + u32 entry_idx, entry_offset, vdev_idx; + u8 vdev_id; + + param.reason = reason; + param.force_mode = mode; + + for (vdev_idx = 0; vdev_idx < num_mlo_vdev; vdev_idx++) { + vdev_id = mlo_vdev_id_lst[vdev_idx]; + entry_idx = vdev_id / 32; + entry_offset = vdev_id % 32; + if (entry_idx >= WMI_MLO_LINK_NUM_SZ) { + ath12k_warn(ab, "Invalid entry_idx %d num_mlo_vdev %d vdev %d", + entry_idx, num_mlo_vdev, vdev_id); + return -EINVAL; + } + param.vdev_bitmap[entry_idx] |= 1 << entry_offset; + /* update entry number if entry index changed */ + if (param.num_vdev_bitmap < entry_idx + 1) + param.num_vdev_bitmap = entry_idx + 1; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, + "num_vdev_bitmap %d vdev_bitmap[0] = 0x%x, vdev_bitmap[1] = 0x%x", + param.num_vdev_bitmap, param.vdev_bitmap[0], param.vdev_bitmap[1]); + + if (mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_INACTIVE) { + for (vdev_idx = 0; vdev_idx < num_mlo_inactive_vdev; vdev_idx++) { + vdev_id = mlo_inactive_vdev_lst[vdev_idx]; + entry_idx = vdev_id / 32; + entry_offset = vdev_id % 32; + if (entry_idx >= WMI_MLO_LINK_NUM_SZ) { + ath12k_warn(ab, "Invalid entry_idx %d num_mlo_vdev %d vdev %d", + entry_idx, num_mlo_inactive_vdev, vdev_id); + return -EINVAL; + } + param.inactive_vdev_bitmap[entry_idx] |= 1 << entry_offset; + /* update entry number if entry index changed */ + if (param.num_inactive_vdev_bitmap < entry_idx + 1) + param.num_inactive_vdev_bitmap = entry_idx + 1; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, + "num_vdev_bitmap %d inactive_vdev_bitmap[0] = 0x%x, inactive_vdev_bitmap[1] = 0x%x", + param.num_inactive_vdev_bitmap, + param.inactive_vdev_bitmap[0], + param.inactive_vdev_bitmap[1]); + } + + if (mode == WMI_MLO_LINK_FORCE_MODE_ACTIVE_LINK_NUM || + mode == WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM) { + param.num_link_entry = 1; + param.link_num[0].num_of_link = num_mlo_vdev - 1; + } + + return ath12k_wmi_send_mlo_link_set_active_cmd(ab, ¶m); +} + +static int ath12k_mac_mlo_sta_update_link_active(struct ath12k_base *ab, + struct ieee80211_hw *hw, + struct ath12k_vif *ahvif) +{ + u8 mlo_vdev_id_lst[IEEE80211_MLD_MAX_NUM_LINKS] = {0}; + u32 mlo_freq_list[IEEE80211_MLD_MAX_NUM_LINKS] = {0}; + unsigned long links = ahvif->links_map; + enum wmi_mlo_link_force_reason reason; + struct ieee80211_chanctx_conf *conf; + enum wmi_mlo_link_force_mode mode; + struct ieee80211_bss_conf *info; + struct ath12k_link_vif *arvif; + u8 num_mlo_vdev = 0; + u8 link_id; + + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + /* make sure vdev is created on this device */ + if (!arvif || !arvif->is_created || arvif->ar->ab != ab) + continue; + + info = ath12k_mac_get_link_bss_conf(arvif); + conf = wiphy_dereference(hw->wiphy, info->chanctx_conf); + mlo_freq_list[num_mlo_vdev] = conf->def.chan->center_freq; + + mlo_vdev_id_lst[num_mlo_vdev] = arvif->vdev_id; + num_mlo_vdev++; + } + + /* It is not allowed to activate more links than a single device + * supported. Something goes wrong if we reach here. + */ + if (num_mlo_vdev > ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE) { + WARN_ON_ONCE(1); + return -EINVAL; + } + + /* if 2 links are established and both link channels fall on the + * same hardware MAC, send command to firmware to deactivate one + * of them. + */ + if (num_mlo_vdev == 2 && + ath12k_mac_freqs_on_same_mac(ab, mlo_freq_list[0], + mlo_freq_list[1])) { + mode = WMI_MLO_LINK_FORCE_MODE_INACTIVE_LINK_NUM; + reason = WMI_MLO_LINK_FORCE_REASON_NEW_CONNECT; + return ath12k_mac_mlo_sta_set_link_active(ab, reason, mode, + mlo_vdev_id_lst, num_mlo_vdev, + NULL, 0); + } + + return 0; +} + static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -5899,10 +6104,12 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); struct ath12k_hw *ah = ath12k_hw_to_ah(hw); + struct ath12k_base *prev_ab = NULL, *ab; struct ath12k_link_vif *arvif; struct ath12k_link_sta *arsta; unsigned long valid_links; - u8 link_id = 0; + u8 link_id = 0, i; + struct ath12k *ar; int ret; lockdep_assert_wiphy(hw->wiphy); @@ -5997,6 +6204,24 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, } } + if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION && + old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTHORIZED) { + for_each_ar(ah, ar, i) { + ab = ar->ab; + if (prev_ab == ab) + continue; + + ret = ath12k_mac_mlo_sta_update_link_active(ab, hw, ahvif); + if (ret) { + ath12k_warn(ab, + "failed to update link active state on connect %d\n", + ret); + goto exit; + } + + prev_ab = ab; + } + } /* IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST: * Remove the station from driver (handle ML sta here since that * needs special handling. Normal sta will be handled in generic diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index e6e74b45bfa4..cc81b1f5680f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -54,6 +54,8 @@ struct ath12k_generic_iter { #define ATH12K_DEFAULT_SCAN_LINK IEEE80211_MLD_MAX_NUM_LINKS #define ATH12K_NUM_MAX_LINKS (IEEE80211_MLD_MAX_NUM_LINKS + 1) +#define ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE 2 + enum ath12k_supported_bw { ATH12K_BW_20 = 0, ATH12K_BW_40 = 1, From d9dbc6b8b94ab41fb8b9dbb5a7be024029d46309 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 22 May 2025 16:54:15 +0800 Subject: [PATCH 384/497] wifi: ath12k: don't activate more links than firmware supports In case of ML connection, currently all useful links are activated at ASSOC stage: ieee80211_set_active_links(vif, ieee80211_vif_usable_links(vif)) this results in firmware crash when the number of links activated on the same device is more than supported. Since firmware supports activating at most 2 links for a ML connection, to avoid firmware crash, host needs to select 2 links out of the useful links. As the assoc link has already been chosen, the question becomes how to determine partner links. A straightforward principle applied here is that the resulted combination should achieve the best throughput. For that purpose, ideally various factors like bandwidth, RSSI etc should be considered. But that would be too complicate. To make it easy, the choice is to only take hardware modes into consideration. The SBS (single band simultaneously) mode frequency range covers 5 GHz and 6 GHz bands. In this mode, the two individual MACs are both active, with one working on 5g-high band and the other on 5g-low band (from hardware perspective 5 GHz and 6 GHz bands are referred to as a 'large' single 5 GHz band). The DBS (dual band simultaneously) mode covers 2 GHz band and the 'large' 5 GHz band, with one MAC working on 2 GHz band and the other working on 5 GHz band or 6 GHz band. Since 5,6 GHz bands could provide higher bandwidth than 2 GHz band, the preference is given to SBS mode. Other hardware modes results in only one working MAC at any given time, so it is chosen only when both SBS are DBS are not possible. For each hardware mode, if there are more than one partner candidate, just choose the first one. For now only single device MLO case is handled as it is easy. Other cases could be addressed in the future. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20250522-ath12k-sbs-dbs-v1-6-54a29e7a3a88@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 6 ++ drivers/net/wireless/ath/ath12k/mac.c | 137 ++++++++++++++++++++++++- 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 941db6e49d6e..efbd6e8ce275 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -601,6 +601,12 @@ struct ath12k_sta { #define ATH12K_NUM_CHANS 101 #define ATH12K_MAX_5GHZ_CHAN 173 +static inline bool ath12k_is_2ghz_channel_freq(u32 freq) +{ + return freq >= ATH12K_MIN_2GHZ_FREQ && + freq <= ATH12K_MAX_2GHZ_FREQ; +} + enum ath12k_hw_state { ATH12K_HW_STATE_OFF, ATH12K_HW_STATE_ON, diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 2fae4b01ec90..b5b31cd8de99 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -6095,6 +6095,122 @@ static int ath12k_mac_mlo_sta_update_link_active(struct ath12k_base *ab, return 0; } +static bool ath12k_mac_are_sbs_chan(struct ath12k_base *ab, u32 freq_1, u32 freq_2) +{ + if (!ath12k_mac_is_hw_sbs_capable(ab)) + return false; + + if (ath12k_is_2ghz_channel_freq(freq_1) || + ath12k_is_2ghz_channel_freq(freq_2)) + return false; + + return !ath12k_mac_2_freq_same_mac_in_sbs(ab, freq_1, freq_2); +} + +static bool ath12k_mac_are_dbs_chan(struct ath12k_base *ab, u32 freq_1, u32 freq_2) +{ + if (!ath12k_mac_is_hw_dbs_capable(ab)) + return false; + + return !ath12k_mac_2_freq_same_mac_in_dbs(ab, freq_1, freq_2); +} + +static int ath12k_mac_select_links(struct ath12k_base *ab, + struct ieee80211_vif *vif, + struct ieee80211_hw *hw, + u16 *selected_links) +{ + unsigned long useful_links = ieee80211_vif_usable_links(vif); + struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + u8 num_useful_links = hweight_long(useful_links); + struct ieee80211_chanctx_conf *chanctx; + struct ath12k_link_vif *assoc_arvif; + u32 assoc_link_freq, partner_freq; + u16 sbs_links = 0, dbs_links = 0; + struct ieee80211_bss_conf *info; + struct ieee80211_channel *chan; + struct ieee80211_sta *sta; + struct ath12k_sta *ahsta; + u8 link_id; + + /* activate all useful links if less than max supported */ + if (num_useful_links <= ATH12K_NUM_MAX_ACTIVE_LINKS_PER_DEVICE) { + *selected_links = useful_links; + return 0; + } + + /* only in station mode we can get here, so it's safe + * to use ap_addr + */ + rcu_read_lock(); + sta = ieee80211_find_sta(vif, vif->cfg.ap_addr); + if (!sta) { + rcu_read_unlock(); + ath12k_warn(ab, "failed to find sta with addr %pM\n", vif->cfg.ap_addr); + return -EINVAL; + } + + ahsta = ath12k_sta_to_ahsta(sta); + assoc_arvif = wiphy_dereference(hw->wiphy, ahvif->link[ahsta->assoc_link_id]); + info = ath12k_mac_get_link_bss_conf(assoc_arvif); + chanctx = rcu_dereference(info->chanctx_conf); + assoc_link_freq = chanctx->def.chan->center_freq; + rcu_read_unlock(); + ath12k_dbg(ab, ATH12K_DBG_MAC, "assoc link %u freq %u\n", + assoc_arvif->link_id, assoc_link_freq); + + /* assoc link is already activated and has to be kept active, + * only need to select a partner link from others. + */ + useful_links &= ~BIT(assoc_arvif->link_id); + for_each_set_bit(link_id, &useful_links, IEEE80211_MLD_MAX_NUM_LINKS) { + info = wiphy_dereference(hw->wiphy, vif->link_conf[link_id]); + if (!info) { + ath12k_warn(ab, "failed to get link info for link: %u\n", + link_id); + return -ENOLINK; + } + + chan = info->chanreq.oper.chan; + if (!chan) { + ath12k_warn(ab, "failed to get chan for link: %u\n", link_id); + return -EINVAL; + } + + partner_freq = chan->center_freq; + if (ath12k_mac_are_sbs_chan(ab, assoc_link_freq, partner_freq)) { + sbs_links |= BIT(link_id); + ath12k_dbg(ab, ATH12K_DBG_MAC, "new SBS link %u freq %u\n", + link_id, partner_freq); + continue; + } + + if (ath12k_mac_are_dbs_chan(ab, assoc_link_freq, partner_freq)) { + dbs_links |= BIT(link_id); + ath12k_dbg(ab, ATH12K_DBG_MAC, "new DBS link %u freq %u\n", + link_id, partner_freq); + continue; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, "non DBS/SBS link %u freq %u\n", + link_id, partner_freq); + } + + /* choose the first candidate no matter how many is in the list */ + if (sbs_links) + link_id = __ffs(sbs_links); + else if (dbs_links) + link_id = __ffs(dbs_links); + else + link_id = ffs(useful_links) - 1; + + ath12k_dbg(ab, ATH12K_DBG_MAC, "select partner link %u\n", link_id); + + *selected_links = BIT(assoc_arvif->link_id) | BIT(link_id); + + return 0; +} + static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -6108,6 +6224,7 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, struct ath12k_link_vif *arvif; struct ath12k_link_sta *arsta; unsigned long valid_links; + u16 selected_links = 0; u8 link_id = 0, i; struct ath12k *ar; int ret; @@ -6179,8 +6296,24 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, * about to move to the associated state. */ if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION && - old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) - ieee80211_set_active_links(vif, ieee80211_vif_usable_links(vif)); + old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) { + /* TODO: for now only do link selection for single device + * MLO case. Other cases would be handled in the future. + */ + ab = ah->radio[0].ab; + if (ab->ag->num_devices == 1) { + ret = ath12k_mac_select_links(ab, vif, hw, &selected_links); + if (ret) { + ath12k_warn(ab, + "failed to get selected links: %d\n", ret); + goto exit; + } + } else { + selected_links = ieee80211_vif_usable_links(vif); + } + + ieee80211_set_active_links(vif, selected_links); + } /* Handle all the other state transitions in generic way */ valid_links = ahsta->links_map; From a48a931a32f88157db2baa31c10738a7fe660e93 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 12 Jun 2025 09:31:49 +0800 Subject: [PATCH 385/497] wifi: ath12k: fix documentation on firmware stats Regarding the firmware stats events handling, the comment in ath12k_mac_get_fw_stats() says host determines whether all events have been received based on 'end' tag in TLV. This is wrong as there is no such tag at all, actually host makes the decision totally by itself based on the stats type and active pdev/vdev counts etc. Fix it to correctly reflect the logic. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284.1-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Tested-on: QCN9274 hw2.0 WLAN.WBE.1.5-01651-QCAHKSWPL_SILICONZ-1 Signed-off-by: Baochen Qiang Link: https://patch.msgid.link/20250612-ath12k-fw-fixes-v1-1-12f594f3b857@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index b5b31cd8de99..581b6c9c84ea 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4400,9 +4400,8 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, /* Firmware sends WMI_UPDATE_STATS_EVENTID back-to-back * when stats data buffer limit is reached. fw_stats_complete * is completed once host receives first event from firmware, but - * still end might not be marked in the TLV. - * Below loop is to confirm that firmware completed sending all the event - * and fw_stats_done is marked true when end is marked in the TLV. + * still there could be more events following. Below loop is to wait + * until firmware completes sending all the events. */ for (;;) { if (time_after(jiffies, timeout)) From 9a353a4a11a475578be3c02dd17e70afe3a4a7f6 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 12 Jun 2025 09:31:50 +0800 Subject: [PATCH 386/497] wifi: ath12k: avoid burning CPU while waiting for firmware stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ath12k_mac_get_fw_stats() is busy polling fw_stats_done flag while waiting firmware finishing sending all events. This is not good as CPU is monopolized and kept burning during the wait. Change to the completion mechanism to fix it. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284.1-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Tested-on: QCN9274 hw2.0 WLAN.WBE.1.5-01651-QCAHKSWPL_SILICONZ-1 Fixes: e367c924768b ("wifi: ath12k: Request vdev stats from firmware") Reported-by: Grégoire Stein Closes: https://lore.kernel.org/ath12k/AS8P190MB120575BBB25FCE697CD7D4988763A@AS8P190MB1205.EURP190.PROD.OUTLOOK.COM/ Signed-off-by: Baochen Qiang Tested-by: Grégoire Stein Link: https://patch.msgid.link/20250612-ath12k-fw-fixes-v1-2-12f594f3b857@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 2 +- drivers/net/wireless/ath/ath12k/core.h | 2 +- drivers/net/wireless/ath/ath12k/mac.c | 27 ++++++++------------------ drivers/net/wireless/ath/ath12k/wmi.c | 7 ++++--- 4 files changed, 14 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index ebc0560d40e3..fbc62209086f 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1216,6 +1216,7 @@ void ath12k_fw_stats_init(struct ath12k *ar) INIT_LIST_HEAD(&ar->fw_stats.pdevs); INIT_LIST_HEAD(&ar->fw_stats.bcn); init_completion(&ar->fw_stats_complete); + init_completion(&ar->fw_stats_done); } void ath12k_fw_stats_free(struct ath12k_fw_stats *stats) @@ -1228,7 +1229,6 @@ void ath12k_fw_stats_free(struct ath12k_fw_stats *stats) void ath12k_fw_stats_reset(struct ath12k *ar) { spin_lock_bh(&ar->data_lock); - ar->fw_stats.fw_stats_done = false; ath12k_fw_stats_free(&ar->fw_stats); spin_unlock_bh(&ar->data_lock); } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index efbd6e8ce275..2b2e0c16b64e 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -632,7 +632,6 @@ struct ath12k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; - bool fw_stats_done; }; struct ath12k_dbg_htt_stats { @@ -812,6 +811,7 @@ struct ath12k { bool regdom_set_by_user; struct completion fw_stats_complete; + struct completion fw_stats_done; struct completion mlo_setup_done; u32 mlo_setup_status; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 581b6c9c84ea..59ec422992d3 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4360,7 +4360,7 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, { struct ath12k_base *ab = ar->ab; struct ath12k_hw *ah = ath12k_ar_to_ah(ar); - unsigned long timeout, time_left; + unsigned long time_left; int ret; guard(mutex)(&ah->hw_mutex); @@ -4368,19 +4368,13 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, if (ah->state != ATH12K_HW_STATE_ON) return -ENETDOWN; - /* FW stats can get split when exceeding the stats data buffer limit. - * In that case, since there is no end marking for the back-to-back - * received 'update stats' event, we keep a 3 seconds timeout in case, - * fw_stats_done is not marked yet - */ - timeout = jiffies + msecs_to_jiffies(3 * 1000); ath12k_fw_stats_reset(ar); reinit_completion(&ar->fw_stats_complete); + reinit_completion(&ar->fw_stats_done); ret = ath12k_wmi_send_stats_request_cmd(ar, param->stats_id, param->vdev_id, param->pdev_id); - if (ret) { ath12k_warn(ab, "failed to request fw stats: %d\n", ret); return ret; @@ -4391,7 +4385,6 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, param->pdev_id, param->vdev_id, param->stats_id); time_left = wait_for_completion_timeout(&ar->fw_stats_complete, 1 * HZ); - if (!time_left) { ath12k_warn(ab, "time out while waiting for get fw stats\n"); return -ETIMEDOUT; @@ -4400,19 +4393,15 @@ int ath12k_mac_get_fw_stats(struct ath12k *ar, /* Firmware sends WMI_UPDATE_STATS_EVENTID back-to-back * when stats data buffer limit is reached. fw_stats_complete * is completed once host receives first event from firmware, but - * still there could be more events following. Below loop is to wait + * still there could be more events following. Below is to wait * until firmware completes sending all the events. */ - for (;;) { - if (time_after(jiffies, timeout)) - break; - spin_lock_bh(&ar->data_lock); - if (ar->fw_stats.fw_stats_done) { - spin_unlock_bh(&ar->data_lock); - break; - } - spin_unlock_bh(&ar->data_lock); + time_left = wait_for_completion_timeout(&ar->fw_stats_done, 3 * HZ); + if (!time_left) { + ath12k_warn(ab, "time out while waiting for fw stats done\n"); + return -ETIMEDOUT; } + return 0; } diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 6dbd272db1d5..e8323581a5af 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -8192,11 +8192,12 @@ static void ath12k_wmi_fw_stats_process(struct ath12k *ar, &ar->fw_stats.vdevs); if (is_end) { - ar->fw_stats.fw_stats_done = true; + complete(&ar->fw_stats_done); num_vdev = 0; } return; } + if (stats->stats_id == WMI_REQUEST_BCN_STAT) { if (list_empty(&stats->bcn)) { ath12k_warn(ab, "empty beacon stats"); @@ -8211,7 +8212,7 @@ static void ath12k_wmi_fw_stats_process(struct ath12k *ar, &ar->fw_stats.bcn); if (is_end) { - ar->fw_stats.fw_stats_done = true; + complete(&ar->fw_stats_done); num_bcn = 0; } } @@ -8249,7 +8250,7 @@ static void ath12k_update_stats_event(struct ath12k_base *ab, struct sk_buff *sk /* Handle WMI_REQUEST_PDEV_STAT status update */ if (stats.stats_id == WMI_REQUEST_PDEV_STAT) { list_splice_tail_init(&stats.pdevs, &ar->fw_stats.pdevs); - ar->fw_stats.fw_stats_done = true; + complete(&ar->fw_stats_done); goto complete; } From ac7b8ff7839ded757806317ab8979be844766770 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 12 Jun 2025 09:31:51 +0800 Subject: [PATCH 387/497] wifi: ath12k: don't use static variables in ath12k_wmi_fw_stats_process() Currently ath12k_wmi_fw_stats_process() is using static variables to count firmware stat events. Taking num_vdev as an example, if for whatever reason (say ar->num_started_vdevs is 0 or firmware bug etc.) the following condition (++num_vdev) == total_vdevs_started is not met, is_end is not set thus num_vdev won't be cleared. Next time when firmware stats is requested again, even if everything is working fine, failure is expected due to the condition above will never be satisfied. The same applies to num_bcn as well. Change to use non-static counters and reset them each time before firmware stats is requested. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284.1-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Tested-on: QCN9274 hw2.0 WLAN.WBE.1.5-01651-QCAHKSWPL_SILICONZ-1 Fixes: e367c924768b ("wifi: ath12k: Request vdev stats from firmware") Signed-off-by: Baochen Qiang Link: https://patch.msgid.link/20250612-ath12k-fw-fixes-v1-3-12f594f3b857@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 2 ++ drivers/net/wireless/ath/ath12k/core.h | 2 ++ drivers/net/wireless/ath/ath12k/wmi.c | 14 +++++--------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index fbc62209086f..89ae80934b30 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1230,6 +1230,8 @@ void ath12k_fw_stats_reset(struct ath12k *ar) { spin_lock_bh(&ar->data_lock); ath12k_fw_stats_free(&ar->fw_stats); + ar->fw_stats.num_vdev_recvd = 0; + ar->fw_stats.num_bcn_recvd = 0; spin_unlock_bh(&ar->data_lock); } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 2b2e0c16b64e..7bcd9c70309f 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -632,6 +632,8 @@ struct ath12k_fw_stats { struct list_head pdevs; struct list_head vdevs; struct list_head bcn; + u32 num_vdev_recvd; + u32 num_bcn_recvd; }; struct ath12k_dbg_htt_stats { diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index e8323581a5af..533e4ddb9a50 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -8166,7 +8166,6 @@ static void ath12k_wmi_fw_stats_process(struct ath12k *ar, struct ath12k_base *ab = ar->ab; struct ath12k_pdev *pdev; bool is_end; - static unsigned int num_vdev, num_bcn; size_t total_vdevs_started = 0; int i; @@ -8186,15 +8185,14 @@ static void ath12k_wmi_fw_stats_process(struct ath12k *ar, } rcu_read_unlock(); - is_end = ((++num_vdev) == total_vdevs_started); + is_end = ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started); list_splice_tail_init(&stats->vdevs, &ar->fw_stats.vdevs); - if (is_end) { + if (is_end) complete(&ar->fw_stats_done); - num_vdev = 0; - } + return; } @@ -8206,15 +8204,13 @@ static void ath12k_wmi_fw_stats_process(struct ath12k *ar, /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++num_bcn) == ar->num_started_vdevs); + is_end = ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); - if (is_end) { + if (is_end) complete(&ar->fw_stats_done); - num_bcn = 0; - } } } From ad5e9178cec589bf3af589dc43e638e5a5bf56fa Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Thu, 12 Jun 2025 09:31:52 +0800 Subject: [PATCH 388/497] wifi: ath12k: don't wait when there is no vdev started For WMI_REQUEST_VDEV_STAT request, firmware might split response into multiple events dut to buffer limit, hence currently in ath12k_wmi_fw_stats_process() host waits until all events received. In case there is no vdev started, this results in that below condition would never get satisfied ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started) consequently the requestor would be blocked until time out. The same applies to WMI_REQUEST_BCN_STAT request as well due to: ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs) Change to check the number of started vdev first: if it is zero, finish directly; if not, follow the old way. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.1.c5-00284.1-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Tested-on: QCN9274 hw2.0 WLAN.WBE.1.5-01651-QCAHKSWPL_SILICONZ-1 Fixes: e367c924768b ("wifi: ath12k: Request vdev stats from firmware") Signed-off-by: Baochen Qiang Link: https://patch.msgid.link/20250612-ath12k-fw-fixes-v1-4-12f594f3b857@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 533e4ddb9a50..465f877fc0fb 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -8165,7 +8165,7 @@ static void ath12k_wmi_fw_stats_process(struct ath12k *ar, { struct ath12k_base *ab = ar->ab; struct ath12k_pdev *pdev; - bool is_end; + bool is_end = true; size_t total_vdevs_started = 0; int i; @@ -8185,7 +8185,9 @@ static void ath12k_wmi_fw_stats_process(struct ath12k *ar, } rcu_read_unlock(); - is_end = ((++ar->fw_stats.num_vdev_recvd) == total_vdevs_started); + if (total_vdevs_started) + is_end = ((++ar->fw_stats.num_vdev_recvd) == + total_vdevs_started); list_splice_tail_init(&stats->vdevs, &ar->fw_stats.vdevs); @@ -8204,7 +8206,9 @@ static void ath12k_wmi_fw_stats_process(struct ath12k *ar, /* Mark end until we reached the count of all started VDEVs * within the PDEV */ - is_end = ((++ar->fw_stats.num_bcn_recvd) == ar->num_started_vdevs); + if (ar->num_started_vdevs) + is_end = ((++ar->fw_stats.num_bcn_recvd) == + ar->num_started_vdevs); list_splice_tail_init(&stats->bcn, &ar->fw_stats.bcn); From 15d25307692312cec4b57052da73387f91a2e870 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 16 Jun 2025 21:12:05 +0300 Subject: [PATCH 389/497] wifi: carl9170: do not ping device which has failed to load firmware Syzkaller reports [1, 2] crashes caused by an attempts to ping the device which has failed to load firmware. Since such a device doesn't pass 'ieee80211_register_hw()', an internal workqueue managed by 'ieee80211_queue_work()' is not yet created and an attempt to queue work on it causes null-ptr-deref. [1] https://syzkaller.appspot.com/bug?extid=9a4aec827829942045ff [2] https://syzkaller.appspot.com/bug?extid=0d8afba53e8fb2633217 Fixes: e4a668c59080 ("carl9170: fix spurious restart due to high latency") Signed-off-by: Dmitry Antipov Acked-by: Christian Lamparter Link: https://patch.msgid.link/20250616181205.38883-1-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/carl9170/usb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index a3e03580cd9f..564ca6a61985 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -438,14 +438,21 @@ static void carl9170_usb_rx_complete(struct urb *urb) if (atomic_read(&ar->rx_anch_urbs) == 0) { /* - * The system is too slow to cope with - * the enormous workload. We have simply - * run out of active rx urbs and this - * unfortunately leads to an unpredictable - * device. + * At this point, either the system is too slow to + * cope with the enormous workload (so we have simply + * run out of active rx urbs and this unfortunately + * leads to an unpredictable device), or the device + * is not fully functional after an unsuccessful + * firmware loading attempts (so it doesn't pass + * ieee80211_register_hw() and there is no internal + * workqueue at all). */ - ieee80211_queue_work(ar->hw, &ar->ping_work); + if (ar->registered) + ieee80211_queue_work(ar->hw, &ar->ping_work); + else + pr_warn_once("device %s is not registered\n", + dev_name(&ar->udev->dev)); } } else { /* From cb6075bc62dc6a9cd7ab3572758685fdf78e3e20 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 6 Jun 2025 13:10:34 -0400 Subject: [PATCH 390/497] x86/mm: Fix early boot use of INVPLGB The INVLPGB instruction has limits on how many pages it can invalidate at once. That limit is enumerated in CPUID, read by the kernel, and stored in 'invpgb_count_max'. Ranged invalidation, like invlpgb_kernel_range_flush() break up their invalidations so that they do not exceed the limit. However, early boot code currently attempts to do ranged invalidation before populating 'invlpgb_count_max'. There is a for loop which is basically: for (...; addr < end; addr += invlpgb_count_max*PAGE_SIZE) If invlpgb_kernel_range_flush is called before the kernel has read the value of invlpgb_count_max from the hardware, the normally bounded loop can become an infinite loop if invlpgb_count_max is initialized to zero. Fix that issue by initializing invlpgb_count_max to 1. This way INVPLGB at early boot time will be a little bit slower than normal (with initialized invplgb_count_max), and not an instant hang at bootup time. Fixes: b7aa05cbdc52 ("x86/mm: Add INVLPGB support code") Signed-off-by: Rik van Riel Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20250606171112.4013261-3-riel%40surriel.com --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 93da466dfe2c..b2ad8d13211a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -31,7 +31,7 @@ #include "cpu.h" -u16 invlpgb_count_max __ro_after_init; +u16 invlpgb_count_max __ro_after_init = 1; static inline int rdmsrq_amd_safe(unsigned msr, u64 *p) { From 6dbb0d97c5096072c78a6abffe393584e57ae945 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 16 Jun 2025 13:15:12 -0700 Subject: [PATCH 391/497] mpls: Use rcu_dereference_rtnl() in mpls_route_input_rcu(). As syzbot reported [0], mpls_route_input_rcu() can be called from mpls_getroute(), where is under RTNL. net->mpls.platform_label is only updated under RTNL. Let's use rcu_dereference_rtnl() in mpls_route_input_rcu() to silence the splat. [0]: WARNING: suspicious RCU usage 6.15.0-rc7-syzkaller-00082-g5cdb2c77c4c3 #0 Not tainted ---------------------------- net/mpls/af_mpls.c:84 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by syz.2.4451/17730: #0: ffffffff9012a3e8 (rtnl_mutex){+.+.}-{4:4}, at: rtnl_lock net/core/rtnetlink.c:80 [inline] #0: ffffffff9012a3e8 (rtnl_mutex){+.+.}-{4:4}, at: rtnetlink_rcv_msg+0x371/0xe90 net/core/rtnetlink.c:6961 stack backtrace: CPU: 1 UID: 0 PID: 17730 Comm: syz.2.4451 Not tainted 6.15.0-rc7-syzkaller-00082-g5cdb2c77c4c3 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x16c/0x1f0 lib/dump_stack.c:120 lockdep_rcu_suspicious+0x166/0x260 kernel/locking/lockdep.c:6865 mpls_route_input_rcu+0x1d4/0x200 net/mpls/af_mpls.c:84 mpls_getroute+0x621/0x1ea0 net/mpls/af_mpls.c:2381 rtnetlink_rcv_msg+0x3c9/0xe90 net/core/rtnetlink.c:6964 netlink_rcv_skb+0x16d/0x440 net/netlink/af_netlink.c:2534 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x53a/0x7f0 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x8d1/0xdd0 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg net/socket.c:727 [inline] ____sys_sendmsg+0xa98/0xc70 net/socket.c:2566 ___sys_sendmsg+0x134/0x1d0 net/socket.c:2620 __sys_sendmmsg+0x200/0x420 net/socket.c:2709 __do_sys_sendmmsg net/socket.c:2736 [inline] __se_sys_sendmmsg net/socket.c:2733 [inline] __x64_sys_sendmmsg+0x9c/0x100 net/socket.c:2733 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x230 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f0a2818e969 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f0a28f52038 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00007f0a283b5fa0 RCX: 00007f0a2818e969 RDX: 0000000000000003 RSI: 0000200000000080 RDI: 0000000000000003 RBP: 00007f0a28210ab1 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 00007f0a283b5fa0 R15: 00007ffce5e9f268 Fixes: 0189197f4416 ("mpls: Basic routing support") Reported-by: syzbot+8a583bdd1a5cc0b0e068@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68507981.a70a0220.395abc.01ef.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250616201532.1036568-1-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- net/mpls/af_mpls.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d536c97144e9..47d7dfd9ad09 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -81,8 +81,8 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) if (index < net->mpls.platform_labels) { struct mpls_route __rcu **platform_label = - rcu_dereference(net->mpls.platform_label); - rt = rcu_dereference(platform_label[index]); + rcu_dereference_rtnl(net->mpls.platform_label); + rt = rcu_dereference_rtnl(platform_label[index]); } return rt; } From 2f370ae1fb6317985f3497b1bb80d457508ca2f7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 16 Jun 2025 11:21:14 -0700 Subject: [PATCH 392/497] atm: atmtcp: Free invalid length skb in atmtcp_c_send(). syzbot reported the splat below. [0] vcc_sendmsg() copies data passed from userspace to skb and passes it to vcc->dev->ops->send(). atmtcp_c_send() accesses skb->data as struct atmtcp_hdr after checking if skb->len is 0, but it's not enough. Also, when skb->len == 0, skb and sk (vcc) were leaked because dev_kfree_skb() is not called and sk_wmem_alloc adjustment is missing to revert atm_account_tx() in vcc_sendmsg(), which is expected to be done in atm_pop_raw(). Let's properly free skb with an invalid length in atmtcp_c_send(). [0]: BUG: KMSAN: uninit-value in atmtcp_c_send+0x255/0xed0 drivers/atm/atmtcp.c:294 atmtcp_c_send+0x255/0xed0 drivers/atm/atmtcp.c:294 vcc_sendmsg+0xd7c/0xff0 net/atm/common.c:644 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x330/0x3d0 net/socket.c:727 ____sys_sendmsg+0x7e0/0xd80 net/socket.c:2566 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2620 __sys_sendmsg net/socket.c:2652 [inline] __do_sys_sendmsg net/socket.c:2657 [inline] __se_sys_sendmsg net/socket.c:2655 [inline] __x64_sys_sendmsg+0x211/0x3e0 net/socket.c:2655 x64_sys_call+0x32fb/0x3db0 arch/x86/include/generated/asm/syscalls_64.h:47 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xd9/0x210 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: slab_post_alloc_hook mm/slub.c:4154 [inline] slab_alloc_node mm/slub.c:4197 [inline] kmem_cache_alloc_node_noprof+0x818/0xf00 mm/slub.c:4249 kmalloc_reserve+0x13c/0x4b0 net/core/skbuff.c:579 __alloc_skb+0x347/0x7d0 net/core/skbuff.c:670 alloc_skb include/linux/skbuff.h:1336 [inline] vcc_sendmsg+0xb40/0xff0 net/atm/common.c:628 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x330/0x3d0 net/socket.c:727 ____sys_sendmsg+0x7e0/0xd80 net/socket.c:2566 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2620 __sys_sendmsg net/socket.c:2652 [inline] __do_sys_sendmsg net/socket.c:2657 [inline] __se_sys_sendmsg net/socket.c:2655 [inline] __x64_sys_sendmsg+0x211/0x3e0 net/socket.c:2655 x64_sys_call+0x32fb/0x3db0 arch/x86/include/generated/asm/syscalls_64.h:47 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xd9/0x210 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f CPU: 1 UID: 0 PID: 5798 Comm: syz-executor192 Not tainted 6.16.0-rc1-syzkaller-00010-g2c4a1f3fe03e #0 PREEMPT(undef) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+1d3c235276f62963e93a@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=1d3c235276f62963e93a Tested-by: syzbot+1d3c235276f62963e93a@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250616182147.963333-2-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- drivers/atm/atmtcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index d4aa0f353b6c..eeae160c898d 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -288,7 +288,9 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) struct sk_buff *new_skb; int result = 0; - if (!skb->len) return 0; + if (skb->len < sizeof(struct atmtcp_hdr)) + goto done; + dev = vcc->dev_data; hdr = (struct atmtcp_hdr *) skb->data; if (hdr->length == ATMTCP_HDR_MAGIC) { From 7851263998d4269125fd6cb3fdbfc7c6db853859 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 16 Jun 2025 11:21:15 -0700 Subject: [PATCH 393/497] atm: Revert atm_account_tx() if copy_from_iter_full() fails. In vcc_sendmsg(), we account skb->truesize to sk->sk_wmem_alloc by atm_account_tx(). It is expected to be reverted by atm_pop_raw() later called by vcc->dev->ops->send(vcc, skb). However, vcc_sendmsg() misses the same revert when copy_from_iter_full() fails, and then we will leak a socket. Let's factorise the revert part as atm_return_tx() and call it in the failure path. Note that the corresponding sk_wmem_alloc operation can be found in alloc_tx() as of the blamed commit. $ git blame -L:alloc_tx net/atm/common.c c55fa3cccbc2c~ Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Simon Horman Closes: https://lore.kernel.org/netdev/20250614161959.GR414686@horms.kernel.org/ Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250616182147.963333-3-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/atmdev.h | 6 ++++++ net/atm/common.c | 1 + net/atm/raw.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 9b02961d65ee..45f2f278b50a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -249,6 +249,12 @@ static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) ATM_SKB(skb)->atm_options = vcc->atm_options; } +static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, + &sk_atm(vcc)->sk_wmem_alloc)); +} + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc); diff --git a/net/atm/common.c b/net/atm/common.c index 9b75699992ff..d7f7976ea13a 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -635,6 +635,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) skb->dev = NULL; /* for paths shared with net_device interfaces */ if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { + atm_return_tx(vcc, skb); kfree_skb(skb); error = -EFAULT; goto out; diff --git a/net/atm/raw.c b/net/atm/raw.c index 2b5f78a7ec3e..1e6511ec842c 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -36,7 +36,7 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) pr_debug("(%d) %d -= %d\n", vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); - WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); + atm_return_tx(vcc, skb); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } From 905eeb2b7c33adda23a966aeb811ab4cb9e62031 Mon Sep 17 00:00:00 2001 From: Tatsuyuki Ishi Date: Thu, 12 Jun 2025 19:18:25 +0900 Subject: [PATCH 394/497] erofs: impersonate the opener's credentials when accessing backing file Previously, file operations on a file-backed mount used the current process' credentials to access the backing FD. Attempting to do so on Android lead to SELinux denials, as ACL rules on the backing file (e.g. /system/apex/foo.apex) is restricted to a small set of process. Arguably, this error is redundant and leaking implementation details, as access to files on a mount is already ACL'ed by path. Instead, override to use the opener's cred when accessing the backing file. This makes the behavior similar to a loop-backed mount, which uses kworker cred when accessing the backing file and does not cause SELinux denials. Signed-off-by: Tatsuyuki Ishi Reviewed-by: Gao Xiang Reviewed-by: Hongbo Li Link: https://lore.kernel.org/r/20250612-b4-erofs-impersonate-v1-1-8ea7d6f65171@google.com Signed-off-by: Gao Xiang --- fs/erofs/fileio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 7d81f504bff0..df5cc63f2c01 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -47,6 +47,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) { + const struct cred *old_cred; struct iov_iter iter; int ret; @@ -60,7 +61,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) rq->iocb.ki_flags = IOCB_DIRECT; iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt, rq->bio.bi_iter.bi_size); + old_cred = override_creds(rq->iocb.ki_filp->f_cred); ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); + revert_creds(old_cred); if (ret != -EIOCBQUEUED) erofs_fileio_ki_complete(&rq->iocb, ret); } From 30b58444807c93bffeaba7d776110f2a909d2f9a Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 17 Jun 2025 13:40:56 +0800 Subject: [PATCH 395/497] erofs: remove unused trace event erofs_destroy_inode The trace event `erofs_destroy_inode` was added but remains unused. This unused event contributes approximately 5KB to the kernel module size. Reported-by: Steven Rostedt Closes: https://lore.kernel.org/r/20250612224906.15000244@batman.local.home Fixes: 13f06f48f7bf ("staging: erofs: support tracepoint") Cc: stable@vger.kernel.org Reviewed-by: Hongbo Li Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250617054056.3232365-1-hsiangkao@linux.alibaba.com --- include/trace/events/erofs.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index a5f4b9234f46..dad7360f42f9 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -211,24 +211,6 @@ TRACE_EVENT(erofs_map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -TRACE_EVENT(erofs_destroy_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( erofs_nid_t, nid ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->nid = EROFS_I(inode)->nid; - ), - - TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry)) -); - #endif /* _TRACE_EROFS_H */ /* This part must be outside protection */ From e7417421d89358da071fd2930f91e67c7128fbff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Jun 2025 11:45:29 +0200 Subject: [PATCH 396/497] wifi: ath6kl: remove WARN on bad firmware input If the firmware gives bad input, that's nothing to do with the driver's stack at this point etc., so the WARN_ON() doesn't add any value. Additionally, this is one of the top syzbot reports now. Just print a message, and as an added bonus, print the sizes too. Reported-by: syzbot+92c6dd14aaa230be6855@syzkaller.appspotmail.com Tested-by: syzbot+92c6dd14aaa230be6855@syzkaller.appspotmail.com Acked-by: Jeff Johnson Link: https://patch.msgid.link/20250617114529.031a677a348e.I58bf1eb4ac16a82c546725ff010f3f0d2b0cca49@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/bmi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath6kl/bmi.c b/drivers/net/wireless/ath/ath6kl/bmi.c index af98e871199d..5a9e93fd1ef4 100644 --- a/drivers/net/wireless/ath/ath6kl/bmi.c +++ b/drivers/net/wireless/ath/ath6kl/bmi.c @@ -87,7 +87,9 @@ int ath6kl_bmi_get_target_info(struct ath6kl *ar, * We need to do some backwards compatibility to make this work. */ if (le32_to_cpu(targ_info->byte_count) != sizeof(*targ_info)) { - WARN_ON(1); + ath6kl_err("mismatched byte count %d vs. expected %zd\n", + le32_to_cpu(targ_info->byte_count), + sizeof(*targ_info)); return -EINVAL; } From db5957ab85204a02093ac5ab2d0bbfe253955b71 Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Tue, 17 Jun 2025 15:00:06 +0300 Subject: [PATCH 397/497] wifi: iwlwifi: restore missing initialization of async_handlers_list (again) The initialization of async_handlers_list was accidentally removed in a previous change. Then it was restoted by commit 175e69e33c66 ("wifi: iwlwifi: restore missing initialization of async_handlers_list"). Somehow, the initialization disappeared again. Restote it. Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/mld.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mld.c b/drivers/net/wireless/intel/iwlwifi/mld/mld.c index e8820e7cf8fa..1774bb84dd3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mld.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mld.c @@ -77,6 +77,7 @@ void iwl_construct_mld(struct iwl_mld *mld, struct iwl_trans *trans, /* Setup async RX handling */ spin_lock_init(&mld->async_handlers_lock); + INIT_LIST_HEAD(&mld->async_handlers_list); wiphy_work_init(&mld->async_handlers_wk, iwl_mld_async_handlers_wk); From d5352b491a3a2628f1a798952a4ae76bde5d42e4 Mon Sep 17 00:00:00 2001 From: Pei Xiao Date: Tue, 27 May 2025 16:03:55 +0800 Subject: [PATCH 398/497] wifi: iwlwifi: cfg: Limit cb_size to valid range on arm64 defconfig build failed with gcc-8: drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c:208:3: include/linux/bitfield.h:195:3: error: call to '__field_overflow' declared with attribute error: value doesn't fit into mask __field_overflow(); \ ^~~~~~~~~~~~~~~~~~ include/linux/bitfield.h:215:2: note: in expansion of macro '____MAKE_OP' ____MAKE_OP(u##size,u##size,,) ^~~~~~~~~~~ include/linux/bitfield.h:218:1: note: in expansion of macro '__MAKE_OP' __MAKE_OP(32) Limit cb_size to valid range to fix it. Signed-off-by: Pei Xiao Link: https://patch.msgid.link/7b373a4426070d50b5afb3269fd116c18ce3aea8.1748332709.git.xiaopei01@kylinos.cn Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c index cb36baac14da..4f2be0c1bd97 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c @@ -166,7 +166,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_context_info *ctxt_info; struct iwl_context_info_rbd_cfg *rx_cfg; - u32 control_flags = 0, rb_size; + u32 control_flags = 0, rb_size, cb_size; dma_addr_t phys; int ret; @@ -202,11 +202,12 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans, rb_size = IWL_CTXT_INFO_RB_SIZE_4K; } - WARN_ON(RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)) > 12); + cb_size = RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)); + if (WARN_ON(cb_size > 12)) + cb_size = 12; + control_flags = IWL_CTXT_INFO_TFD_FORMAT_LONG; - control_flags |= - u32_encode_bits(RX_QUEUE_CB_SIZE(iwl_trans_get_num_rbds(trans)), - IWL_CTXT_INFO_RB_CB_SIZE); + control_flags |= u32_encode_bits(cb_size, IWL_CTXT_INFO_RB_CB_SIZE); control_flags |= u32_encode_bits(rb_size, IWL_CTXT_INFO_RB_SIZE); ctxt_info->control.control_flags = cpu_to_le32(control_flags); From 432a41232ca932ecb2330f46105e68e296ba8c7f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 16 Jun 2025 13:48:36 +0200 Subject: [PATCH 399/497] wifi: iwlwifi: dvm: restore n_no_reclaim_cmds setting Apparently I accidentally removed this setting in my transport configuration rework, leading to an endless stream of warnings from the PCIe code when relevant notifications are received by the driver from firmware. Restore it. Reported-by: Woody Suwalski Closes: https://lore.kernel.org/r/e8c45d32-6129-8a5e-af80-ccc42aaf200b@gmail.com/ Fixes: 08e77d5edf70 ("wifi: iwlwifi: rework transport configuration") Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250616134902.222342908ca4.I47a551c86cbc0e9de4f980ca2fd0d67bf4052e50@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/dvm/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index dbfd45948e8b..66211426aa3a 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1316,6 +1316,7 @@ static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans, sizeof(trans->conf.no_reclaim_cmds)); memcpy(trans->conf.no_reclaim_cmds, no_reclaim_cmds, sizeof(no_reclaim_cmds)); + trans->conf.n_no_reclaim_cmds = ARRAY_SIZE(no_reclaim_cmds); switch (iwlwifi_mod_params.amsdu_size) { case IWL_AMSDU_DEF: From 83f3ac2848b46e3e5af5d06b5f176c17e35733a3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 22 May 2025 13:17:03 +0100 Subject: [PATCH 400/497] wifi: iwlwifi: Fix incorrect logic on cmd_ver range checking The current cmd_ver range checking on cmd_ver < 1 && cmd_ver > 3 can never be true because the logical operator && is being used, cmd_ver can never be less than 1 and also greater than 3. Fix this by using the logical || operator. Fixes: df6146a0296e ("wifi: iwlwifi: Add a new version for mac config command") Signed-off-by: Colin Ian King Link: https://patch.msgid.link/20250522121703.2766764-1-colin.i.king@gmail.com Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c index 81ca9ff67be9..3c255ae916c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac.c @@ -34,7 +34,7 @@ static void iwl_mvm_mld_mac_ctxt_cmd_common(struct iwl_mvm *mvm, WIDE_ID(MAC_CONF_GROUP, MAC_CONFIG_CMD), 0); - if (WARN_ON(cmd_ver < 1 && cmd_ver > 3)) + if (WARN_ON(cmd_ver < 1 || cmd_ver > 3)) return; cmd->id_and_color = cpu_to_le32(mvmvif->id); From d0fa59897e049e84432600e86df82aab3dce7aa5 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 13 Jun 2025 15:30:56 -0400 Subject: [PATCH 401/497] tcp: fix tcp_packet_delayed() for tcp_is_non_sack_preventing_reopen() behavior After the following commit from 2024: commit e37ab7373696 ("tcp: fix to allow timestamp undo if no retransmits were sent") ...there was buggy behavior where TCP connections without SACK support could easily see erroneous undo events at the end of fast recovery or RTO recovery episodes. The erroneous undo events could cause those connections to suffer repeated loss recovery episodes and high retransmit rates. The problem was an interaction between the non-SACK behavior on these connections and the undo logic. The problem is that, for non-SACK connections at the end of a loss recovery episode, if snd_una == high_seq, then tcp_is_non_sack_preventing_reopen() holds steady in CA_Recovery or CA_Loss, but clears tp->retrans_stamp to 0. Then upon the next ACK the "tcp: fix to allow timestamp undo if no retransmits were sent" logic saw the tp->retrans_stamp at 0 and erroneously concluded that no data was retransmitted, and erroneously performed an undo of the cwnd reduction, restoring cwnd immediately to the value it had before loss recovery. This caused an immediate burst of traffic and build-up of queues and likely another immediate loss recovery episode. This commit fixes tcp_packet_delayed() to ignore zero retrans_stamp values for non-SACK connections when snd_una is at or above high_seq, because tcp_is_non_sack_preventing_reopen() clears retrans_stamp in this case, so it's not a valid signal that we can undo. Note that the commit named in the Fixes footer restored long-present behavior from roughly 2005-2019, so apparently this bug was present for a while during that era, and this was simply not caught. Fixes: e37ab7373696 ("tcp: fix to allow timestamp undo if no retransmits were sent") Reported-by: Eric Wheeler Closes: https://lore.kernel.org/netdev/64ea9333-e7f9-0df-b0f2-8d566143acab@ewheeler.net/ Signed-off-by: Neal Cardwell Co-developed-by: Yuchung Cheng Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ec92dec321a..12c2e6fc85c6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2479,20 +2479,33 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { const struct sock *sk = (const struct sock *)tp; - if (tp->retrans_stamp && - tcp_tsopt_ecr_before(tp, tp->retrans_stamp)) - return true; /* got echoed TS before first retransmission */ + /* Received an echoed timestamp before the first retransmission? */ + if (tp->retrans_stamp) + return tcp_tsopt_ecr_before(tp, tp->retrans_stamp); - /* Check if nothing was retransmitted (retrans_stamp==0), which may - * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp - * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear - * retrans_stamp even if we had retransmitted the SYN. + /* We set tp->retrans_stamp upon the first retransmission of a loss + * recovery episode, so normally if tp->retrans_stamp is 0 then no + * retransmission has happened yet (likely due to TSQ, which can cause + * fast retransmits to be delayed). So if snd_una advanced while + * (tp->retrans_stamp is 0 then apparently a packet was merely delayed, + * not lost. But there are exceptions where we retransmit but then + * clear tp->retrans_stamp, so we check for those exceptions. */ - if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */ - sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */ - return true; /* nothing was retransmitted */ - return false; + /* (1) For non-SACK connections, tcp_is_non_sack_preventing_reopen() + * clears tp->retrans_stamp when snd_una == high_seq. + */ + if (!tcp_is_sack(tp) && !before(tp->snd_una, tp->high_seq)) + return false; + + /* (2) In TCP_SYN_SENT tcp_clean_rtx_queue() clears tp->retrans_stamp + * when setting FLAG_SYN_ACKED is set, even if the SYN was + * retransmitted. + */ + if (sk->sk_state == TCP_SYN_SENT) + return false; + + return true; /* tp->retrans_stamp is zero; no retransmit yet */ } /* Undo procedures. */ From df29f60369ccec0aa17d7eed7e2ae1fcdc9be6d4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 16 Jun 2025 12:09:17 +0800 Subject: [PATCH 402/497] crypto: ahash - Fix infinite recursion in ahash_def_finup Invoke the final function directly in the default finup implementation since crypto_ahash_final is now just a wrapper around finup. Reported-by: Eric Biggers Fixes: 9d7a0ab1c753 ("crypto: ahash - Handle partial blocks in API") Signed-off-by: Herbert Xu --- crypto/ahash.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index e10bc2659ae4..bc84a07c924c 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -600,12 +600,14 @@ static void ahash_def_finup_done2(void *data, int err) static int ahash_def_finup_finish1(struct ahash_request *req, int err) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + if (err) goto out; req->base.complete = ahash_def_finup_done2; - err = crypto_ahash_final(req); + err = crypto_ahash_alg(tfm)->final(req); if (err == -EINPROGRESS || err == -EBUSY) return err; From 635e118317ffa773f6d25ec6a71b7927d7e8886a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 10:54:44 +0200 Subject: [PATCH 403/497] Revert "mtd: core: always create master device" The idea behind this patch was to always let a "master" mtd device available to anchor runtime PM. Historically, there was no mtd device representing the whole storage as soon as partitions were coming into play. The introduction of CONFIG_MTD_PARTITIONED_MASTER allowed to keep this "master" device, but was not enabled by default to avoid breaking existing users (otherwise the mtd device numbering would be totally messed up with an off by 1, at least). The approach of adding an mtd_master class on top of partitioned mtd devices is breaking the mtd core in many creative ways, so better think again this approach and revert the faulty changes for now. This reverts commit 0aa7b390fc40a871267a2328bbbefca8b37ad307. Fixes: 0aa7b390fc40 ("mtd: core: always create master device") Tested-by: Guenter Roeck Acked-by: Guenter Roeck Signed-off-by: Miquel Raynal --- drivers/mtd/mtdchar.c | 2 +- drivers/mtd/mtdcore.c | 152 +++++++++------------------------ drivers/mtd/mtdcore.h | 2 +- drivers/mtd/mtdpart.c | 16 ++-- include/linux/mtd/partitions.h | 2 +- 5 files changed, 51 insertions(+), 123 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 391d81ad960c..8dc4f5c493fc 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -559,7 +559,7 @@ static int mtdchar_blkpg_ioctl(struct mtd_info *mtd, /* Sanitize user input */ p.devname[BLKPG_DEVNAMELTH - 1] = '\0'; - return mtd_add_partition(mtd, p.devname, p.start, p.length, NULL); + return mtd_add_partition(mtd, p.devname, p.start, p.length); case BLKPG_DEL_PARTITION: diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 429d8c16baf0..5ba9a741f5ac 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -68,13 +68,7 @@ static struct class mtd_class = { .pm = MTD_CLS_PM_OPS, }; -static struct class mtd_master_class = { - .name = "mtd_master", - .pm = MTD_CLS_PM_OPS, -}; - static DEFINE_IDR(mtd_idr); -static DEFINE_IDR(mtd_master_idr); /* These are exported solely for the purpose of mtd_blkdevs.c. You should not use them for _anything_ else */ @@ -89,9 +83,8 @@ EXPORT_SYMBOL_GPL(__mtd_next_device); static LIST_HEAD(mtd_notifiers); -#define MTD_MASTER_DEVS 255 + #define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2) -static dev_t mtd_master_devt; /* REVISIT once MTD uses the driver model better, whoever allocates * the mtd_info will probably want to use the release() hook... @@ -111,17 +104,6 @@ static void mtd_release(struct device *dev) device_destroy(&mtd_class, index + 1); } -static void mtd_master_release(struct device *dev) -{ - struct mtd_info *mtd = dev_get_drvdata(dev); - - idr_remove(&mtd_master_idr, mtd->index); - of_node_put(mtd_get_of_node(mtd)); - - if (mtd_is_partition(mtd)) - release_mtd_partition(mtd); -} - static void mtd_device_release(struct kref *kref) { struct mtd_info *mtd = container_of(kref, struct mtd_info, refcnt); @@ -385,11 +367,6 @@ static const struct device_type mtd_devtype = { .release = mtd_release, }; -static const struct device_type mtd_master_devtype = { - .name = "mtd_master", - .release = mtd_master_release, -}; - static bool mtd_expert_analysis_mode; #ifdef CONFIG_DEBUG_FS @@ -657,13 +634,13 @@ static void mtd_check_of_node(struct mtd_info *mtd) /** * add_mtd_device - register an MTD device * @mtd: pointer to new MTD device info structure - * @partitioned: create partitioned device * * Add a device to the list of MTD devices present in the system, and * notify each currently active MTD 'user' of its arrival. Returns * zero on success or non-zero on failure. */ -int add_mtd_device(struct mtd_info *mtd, bool partitioned) + +int add_mtd_device(struct mtd_info *mtd) { struct device_node *np = mtd_get_of_node(mtd); struct mtd_info *master = mtd_get_master(mtd); @@ -710,17 +687,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) ofidx = -1; if (np) ofidx = of_alias_get_id(np, "mtd"); - if (partitioned) { - if (ofidx >= 0) - i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); - else - i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); - } else { - if (ofidx >= 0) - i = idr_alloc(&mtd_master_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); - else - i = idr_alloc(&mtd_master_idr, mtd, 0, 0, GFP_KERNEL); - } + if (ofidx >= 0) + i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); + else + i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); if (i < 0) { error = i; goto fail_locked; @@ -768,18 +738,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) /* Caller should have set dev.parent to match the * physical device, if appropriate. */ - if (partitioned) { - mtd->dev.type = &mtd_devtype; - mtd->dev.class = &mtd_class; - mtd->dev.devt = MTD_DEVT(i); - dev_set_name(&mtd->dev, "mtd%d", i); - error = dev_set_name(&mtd->dev, "mtd%d", i); - } else { - mtd->dev.type = &mtd_master_devtype; - mtd->dev.class = &mtd_master_class; - mtd->dev.devt = MKDEV(MAJOR(mtd_master_devt), i); - error = dev_set_name(&mtd->dev, "mtd_master%d", i); - } + mtd->dev.type = &mtd_devtype; + mtd->dev.class = &mtd_class; + mtd->dev.devt = MTD_DEVT(i); + error = dev_set_name(&mtd->dev, "mtd%d", i); if (error) goto fail_devname; dev_set_drvdata(&mtd->dev, mtd); @@ -787,7 +749,6 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) of_node_get(mtd_get_of_node(mtd)); error = device_register(&mtd->dev); if (error) { - pr_err("mtd: %s device_register fail %d\n", mtd->name, error); put_device(&mtd->dev); goto fail_added; } @@ -799,13 +760,10 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) mtd_debugfs_populate(mtd); - if (partitioned) { - device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, - "mtd%dro", i); - } + device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, + "mtd%dro", i); - pr_debug("mtd: Giving out %spartitioned device %d to %s\n", - partitioned ? "" : "un-", i, mtd->name); + pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) @@ -813,16 +771,13 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) mutex_unlock(&mtd_table_mutex); - if (partitioned) { - if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { - if (IS_BUILTIN(CONFIG_MTD)) { - pr_info("mtd: setting mtd%d (%s) as root device\n", - mtd->index, mtd->name); - ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); - } else { - pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n", - mtd->index, mtd->name); - } + if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { + if (IS_BUILTIN(CONFIG_MTD)) { + pr_info("mtd: setting mtd%d (%s) as root device\n", mtd->index, mtd->name); + ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); + } else { + pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n", + mtd->index, mtd->name); } } @@ -838,10 +793,7 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) fail_added: of_node_put(mtd_get_of_node(mtd)); fail_devname: - if (partitioned) - idr_remove(&mtd_idr, i); - else - idr_remove(&mtd_master_idr, i); + idr_remove(&mtd_idr, i); fail_locked: mutex_unlock(&mtd_table_mutex); return error; @@ -859,14 +811,12 @@ int add_mtd_device(struct mtd_info *mtd, bool partitioned) int del_mtd_device(struct mtd_info *mtd) { - struct mtd_notifier *not; - struct idr *idr; int ret; + struct mtd_notifier *not; mutex_lock(&mtd_table_mutex); - idr = mtd->dev.class == &mtd_class ? &mtd_idr : &mtd_master_idr; - if (idr_find(idr, mtd->index) != mtd) { + if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; } @@ -1106,7 +1056,6 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, const struct mtd_partition *parts, int nr_parts) { - struct mtd_info *parent; int ret, err; mtd_set_dev_defaults(mtd); @@ -1115,30 +1064,25 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, if (ret) goto out; - ret = add_mtd_device(mtd, false); - if (ret) - goto out; - if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) { - ret = mtd_add_partition(mtd, mtd->name, 0, MTDPART_SIZ_FULL, &parent); + ret = add_mtd_device(mtd); if (ret) goto out; - - } else { - parent = mtd; } /* Prefer parsed partitions over driver-provided fallback */ - ret = parse_mtd_partitions(parent, types, parser_data); + ret = parse_mtd_partitions(mtd, types, parser_data); if (ret == -EPROBE_DEFER) goto out; if (ret > 0) ret = 0; else if (nr_parts) - ret = add_mtd_partitions(parent, parts, nr_parts); - else if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) - ret = mtd_add_partition(parent, mtd->name, 0, MTDPART_SIZ_FULL, NULL); + ret = add_mtd_partitions(mtd, parts, nr_parts); + else if (!device_is_registered(&mtd->dev)) + ret = add_mtd_device(mtd); + else + ret = 0; if (ret) goto out; @@ -1158,14 +1102,13 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, register_reboot_notifier(&mtd->reboot_notifier); } - return 0; out: - nvmem_unregister(mtd->otp_user_nvmem); - nvmem_unregister(mtd->otp_factory_nvmem); + if (ret) { + nvmem_unregister(mtd->otp_user_nvmem); + nvmem_unregister(mtd->otp_factory_nvmem); + } - del_mtd_partitions(mtd); - - if (device_is_registered(&mtd->dev)) { + if (ret && device_is_registered(&mtd->dev)) { err = del_mtd_device(mtd); if (err) pr_err("Error when deleting MTD device (%d)\n", err); @@ -1324,7 +1267,8 @@ int __get_mtd_device(struct mtd_info *mtd) mtd = mtd->parent; } - kref_get(&master->refcnt); + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) + kref_get(&master->refcnt); return 0; } @@ -1418,7 +1362,8 @@ void __put_mtd_device(struct mtd_info *mtd) mtd = parent; } - kref_put(&master->refcnt, mtd_device_release); + if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) + kref_put(&master->refcnt, mtd_device_release); module_put(master->owner); @@ -2585,16 +2530,6 @@ static int __init init_mtd(void) if (ret) goto err_reg; - ret = class_register(&mtd_master_class); - if (ret) - goto err_reg2; - - ret = alloc_chrdev_region(&mtd_master_devt, 0, MTD_MASTER_DEVS, "mtd_master"); - if (ret < 0) { - pr_err("unable to allocate char dev region\n"); - goto err_chrdev; - } - mtd_bdi = mtd_bdi_init("mtd"); if (IS_ERR(mtd_bdi)) { ret = PTR_ERR(mtd_bdi); @@ -2619,10 +2554,6 @@ static int __init init_mtd(void) bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); err_bdi: - unregister_chrdev_region(mtd_master_devt, MTD_MASTER_DEVS); -err_chrdev: - class_unregister(&mtd_master_class); -err_reg2: class_unregister(&mtd_class); err_reg: pr_err("Error registering mtd class or bdi: %d\n", ret); @@ -2636,12 +2567,9 @@ static void __exit cleanup_mtd(void) if (proc_mtd) remove_proc_entry("mtd", NULL); class_unregister(&mtd_class); - class_unregister(&mtd_master_class); - unregister_chrdev_region(mtd_master_devt, MTD_MASTER_DEVS); bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); idr_destroy(&mtd_idr); - idr_destroy(&mtd_master_idr); } module_init(init_mtd); diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h index 2258d31c5aa6..b014861a06a6 100644 --- a/drivers/mtd/mtdcore.h +++ b/drivers/mtd/mtdcore.h @@ -8,7 +8,7 @@ extern struct mutex mtd_table_mutex; extern struct backing_dev_info *mtd_bdi; struct mtd_info *__mtd_next_device(int i); -int __must_check add_mtd_device(struct mtd_info *mtd, bool partitioned); +int __must_check add_mtd_device(struct mtd_info *mtd); int del_mtd_device(struct mtd_info *mtd); int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); int del_mtd_partitions(struct mtd_info *); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 5a3db36d734e..994e8c51e674 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -86,7 +86,8 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, * parent conditional on that option. Note, this is a way to * distinguish between the parent and its partitions in sysfs. */ - child->dev.parent = &parent->dev; + child->dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) || mtd_is_partition(parent) ? + &parent->dev : parent->dev.parent; child->dev.of_node = part->of_node; child->parent = parent; child->part.offset = part->offset; @@ -242,7 +243,7 @@ static int mtd_add_partition_attrs(struct mtd_info *new) } int mtd_add_partition(struct mtd_info *parent, const char *name, - long long offset, long long length, struct mtd_info **out) + long long offset, long long length) { struct mtd_info *master = mtd_get_master(parent); u64 parent_size = mtd_is_partition(parent) ? @@ -275,15 +276,12 @@ int mtd_add_partition(struct mtd_info *parent, const char *name, list_add_tail(&child->part.node, &parent->partitions); mutex_unlock(&master->master.partitions_lock); - ret = add_mtd_device(child, true); + ret = add_mtd_device(child); if (ret) goto err_remove_part; mtd_add_partition_attrs(child); - if (out) - *out = child; - return 0; err_remove_part: @@ -415,7 +413,7 @@ int add_mtd_partitions(struct mtd_info *parent, list_add_tail(&child->part.node, &parent->partitions); mutex_unlock(&master->master.partitions_lock); - ret = add_mtd_device(child, true); + ret = add_mtd_device(child); if (ret) { mutex_lock(&master->master.partitions_lock); list_del(&child->part.node); @@ -592,6 +590,9 @@ static int mtd_part_of_parse(struct mtd_info *master, int ret, err = 0; dev = &master->dev; + /* Use parent device (controller) if the top level MTD is not registered */ + if (!IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) && !mtd_is_partition(master)) + dev = master->dev.parent; np = mtd_get_of_node(master); if (mtd_is_partition(master)) @@ -710,7 +711,6 @@ int parse_mtd_partitions(struct mtd_info *master, const char *const *types, if (ret < 0 && !err) err = ret; } - return err; } diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index 5daf80df9e89..b74a539ec581 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -108,7 +108,7 @@ extern void deregister_mtd_parser(struct mtd_part_parser *parser); deregister_mtd_parser) int mtd_add_partition(struct mtd_info *master, const char *name, - long long offset, long long length, struct mtd_info **part); + long long offset, long long length); int mtd_del_partition(struct mtd_info *master, int partno); uint64_t mtd_get_device_size(const struct mtd_info *mtd); From 60dffe96fab00cee7ef7f1b151da485d42ccb33a Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 10:47:58 +0200 Subject: [PATCH 404/497] mtd: spinand: winbond: Fix W35N number of planes/LUN There's been a mistake when extracting the geometry of the W35N02 and W35N04 chips from the datasheet. There is a single plane, however there are respectively 2 and 4 LUNs. They are actually referred in the datasheet as dies (equivalent of target), but as there is no die select operation and the chips only feature a single configuration register for the entire chip (instead of one per die), we can reasonably assume we are talking about LUNs and not dies. Reported-by: Andreas Dannenberg Suggested-by: Vignesh Raghavendra Fixes: 25e08bf66660 ("mtd: spinand: winbond: Add support for W35N02JW and W35N04JW chips") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/winbond.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 19f8dd4a6370..2808bbd7a16e 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -289,7 +289,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL)), SPINAND_INFO("W35N02JW", /* 1.8V */ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xdf, 0x22), - NAND_MEMORG(1, 4096, 128, 64, 512, 10, 2, 1, 1), + NAND_MEMORG(1, 4096, 128, 64, 512, 10, 1, 2, 1), NAND_ECCREQ(1, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants, &write_cache_octal_variants, @@ -298,7 +298,7 @@ static const struct spinand_info winbond_spinand_table[] = { SPINAND_ECCINFO(&w35n01jw_ooblayout, NULL)), SPINAND_INFO("W35N04JW", /* 1.8V */ SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0xdf, 0x23), - NAND_MEMORG(1, 4096, 128, 64, 512, 10, 4, 1, 1), + NAND_MEMORG(1, 4096, 128, 64, 512, 10, 1, 4, 1), NAND_ECCREQ(1, 512), SPINAND_INFO_OP_VARIANTS(&read_cache_octal_variants, &write_cache_octal_variants, From 29384bbb1a2a5926f97b8cbe469081e7a1770dea Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 10:47:59 +0200 Subject: [PATCH 405/497] mtd: spinand: winbond: Increase maximum frequency on an octal operation The default number of dummy cycles is 16 in octal I/O mode (1S-8S-8S), and with this default configuration the maximum frequency is higher than what is being advertised. There are higher and lower frequency possibilities, which involve making changes in the number of dummy cycles through the VCR register. At this stage, let's just describe the default configuration correctly. There should be no functional change. Fixes: 1ac5ff2f2ad6 ("mtd: spinand: winbond: Add octal support") Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/winbond.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 2808bbd7a16e..0756966b4e3c 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -25,7 +25,7 @@ static SPINAND_OP_VARIANTS(read_cache_octal_variants, SPINAND_PAGE_READ_FROM_CACHE_1S_1D_8D_OP(0, 2, NULL, 0, 105 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(0, 16, NULL, 0, 86 * HZ_PER_MHZ), + SPINAND_PAGE_READ_FROM_CACHE_1S_8S_8S_OP(0, 16, NULL, 0, 162 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_8S_OP(0, 1, NULL, 0, 133 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_1S_OP(0, 1, NULL, 0)); From dba90f5a79c13936de4273a19e67908a0c296afe Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 18 Jun 2025 10:48:00 +0200 Subject: [PATCH 406/497] mtd: spinand: winbond: Prevent unsupported frequencies on dual/quad I/O variants Dual and quad capable chips natively support dual and quad I/O variants at up to 104MHz (1-2-2 and 1-4-4 operations). Reaching the maximum speed of 166MHz is theoretically possible (while still unsupported in the field) by adding a few more dummy cycles. Let's be accurate and clearly state this limit. Setting a maximum frequency implies adding the frequency parameter to the macro, which is done using a variadic argument to avoid impacting all the other drivers which already make use of this macro. Fixes: 1ea808b4d15b ("mtd: spinand: winbond: Update the *JW chip definitions") Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/winbond.c | 4 ++-- include/linux/mtd/spinand.h | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c index 0756966b4e3c..b7a28f001a38 100644 --- a/drivers/mtd/nand/spi/winbond.c +++ b/drivers/mtd/nand/spi/winbond.c @@ -42,11 +42,11 @@ static SPINAND_OP_VARIANTS(update_cache_octal_variants, static SPINAND_OP_VARIANTS(read_cache_dual_quad_dtr_variants, SPINAND_PAGE_READ_FROM_CACHE_1S_4D_4D_OP(0, 8, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_4D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(0, 2, NULL, 0, 104 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_4S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_2D_2D_OP(0, 4, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_2D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), - SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(0, 1, NULL, 0, 104 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_1S_1S_2S_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_1S_1D_1D_OP(0, 2, NULL, 0, 80 * HZ_PER_MHZ), SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(0, 1, NULL, 0), diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 811a0f356315..15eaa09da998 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -113,11 +113,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 2), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_2S_2S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ SPI_MEM_OP_ADDR(2, addr, 2), \ SPI_MEM_OP_DUMMY(ndummy, 2), \ - SPI_MEM_OP_DATA_IN(len, buf, 2)) + SPI_MEM_OP_DATA_IN(len, buf, 2), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_2S_2S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xbb, 1), \ @@ -151,11 +152,12 @@ SPI_MEM_DTR_OP_DATA_IN(len, buf, 4), \ SPI_MEM_OP_MAX_FREQ(freq)) -#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len) \ +#define SPINAND_PAGE_READ_FROM_CACHE_1S_4S_4S_OP(addr, ndummy, buf, len, ...) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ SPI_MEM_OP_ADDR(2, addr, 4), \ SPI_MEM_OP_DUMMY(ndummy, 4), \ - SPI_MEM_OP_DATA_IN(len, buf, 4)) + SPI_MEM_OP_DATA_IN(len, buf, 4), \ + SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0)) #define SPINAND_PAGE_READ_FROM_CACHE_3A_1S_4S_4S_OP(addr, ndummy, buf, len) \ SPI_MEM_OP(SPI_MEM_OP_CMD(0xeb, 1), \ From 10af0273a35ab4513ca1546644b8c853044da134 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Fri, 13 Jun 2025 16:34:43 +0000 Subject: [PATCH 407/497] gpio: mlxbf3: only get IRQ for device instance 0 The gpio-mlxbf3 driver interfaces with two GPIO controllers, device instance 0 and 1. There is a single IRQ resource shared between the two controllers, and it is found in the ACPI table for device instance 0. The driver should not attempt to get an IRQ resource when probing device instance 1, otherwise the following error is logged: mlxbf3_gpio MLNXBF33:01: error -ENXIO: IRQ index 0 not found Signed-off-by: David Thompson Reviewed-by: Shravan Kumar Ramani Fixes: cd33f216d241 ("gpio: mlxbf3: Add gpio driver support") Link: https://lore.kernel.org/r/20250613163443.1065217-1-davthompson@nvidia.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mlxbf3.c | 52 +++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index 10ea71273c89..9875e34bde72 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -190,7 +190,9 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) struct mlxbf3_gpio_context *gs; struct gpio_irq_chip *girq; struct gpio_chip *gc; + char *colon_ptr; int ret, irq; + long num; gs = devm_kzalloc(dev, sizeof(*gs), GFP_KERNEL); if (!gs) @@ -227,25 +229,39 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) gc->owner = THIS_MODULE; gc->add_pin_ranges = mlxbf3_gpio_add_pin_ranges; - irq = platform_get_irq(pdev, 0); - if (irq >= 0) { - girq = &gs->gc.irq; - gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); - girq->default_type = IRQ_TYPE_NONE; - /* This will let us handle the parent IRQ in the driver */ - girq->num_parents = 0; - girq->parents = NULL; - girq->parent_handler = NULL; - girq->handler = handle_bad_irq; + colon_ptr = strchr(dev_name(dev), ':'); + if (!colon_ptr) { + dev_err(dev, "invalid device name format\n"); + return -EINVAL; + } - /* - * Directly request the irq here instead of passing - * a flow-handler because the irq is shared. - */ - ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, - IRQF_SHARED, dev_name(dev), gs); - if (ret) - return dev_err_probe(dev, ret, "failed to request IRQ"); + ret = kstrtol(++colon_ptr, 16, &num); + if (ret) { + dev_err(dev, "invalid device instance\n"); + return ret; + } + + if (!num) { + irq = platform_get_irq(pdev, 0); + if (irq >= 0) { + girq = &gs->gc.irq; + gpio_irq_chip_set_chip(girq, &gpio_mlxbf3_irqchip); + girq->default_type = IRQ_TYPE_NONE; + /* This will let us handle the parent IRQ in the driver */ + girq->num_parents = 0; + girq->parents = NULL; + girq->parent_handler = NULL; + girq->handler = handle_bad_irq; + + /* + * Directly request the irq here instead of passing + * a flow-handler because the irq is shared. + */ + ret = devm_request_irq(dev, irq, mlxbf3_gpio_irq_handler, + IRQF_SHARED, dev_name(dev), gs); + if (ret) + return dev_err_probe(dev, ret, "failed to request IRQ"); + } } platform_set_drvdata(pdev, gs); From e1c75831f682eef0f68b35723437146ed86070b1 Mon Sep 17 00:00:00 2001 From: Penglei Jiang Date: Tue, 17 Jun 2025 09:56:44 -0700 Subject: [PATCH 408/497] io_uring: fix potential page leak in io_sqe_buffer_register() If allocation of the 'imu' fails, then the existing pages aren't unpinned in the error path. This is mostly a theoretical issue, requiring fault injection to hit. Move unpin_user_pages() to unified error handling to fix the page leak issue. Fixes: d8c2237d0aa9 ("io_uring: add io_pin_pages() helper") Signed-off-by: Penglei Jiang Link: https://lore.kernel.org/r/20250617165644.79165-1-superman.xpt@gmail.com Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 94a9db030e0e..d724602697e7 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -809,10 +809,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, imu->nr_bvecs = nr_pages; ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); - if (ret) { - unpin_user_pages(pages, nr_pages); + if (ret) goto done; - } size = iov->iov_len; /* store original address for later verification */ @@ -842,6 +840,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (ret) { if (imu) io_free_imu(ctx, imu); + if (pages) + unpin_user_pages(pages, nr_pages); io_cache_free(&ctx->node_cache, node); node = ERR_PTR(ret); } From 327e28664307d49ce3fa71ba30dcc0007c270974 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Jun 2025 07:38:01 -0400 Subject: [PATCH 409/497] fgraph: Do not enable function_graph tracer when setting funcgraph-args When setting the funcgraph-args option when function graph tracer is net enabled, it incorrectly enables it. Worse, it unregisters itself when it was never registered. Then when it gets enabled again, it will register itself a second time causing a WARNing. ~# echo 1 > /sys/kernel/tracing/options/funcgraph-args ~# head -20 /sys/kernel/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 813/26317372 #P:8 # # _-----=> irqs-off/BH-disabled # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / _-=> migrate-disable # |||| / delay # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | -0 [007] d..4. 358.966010: 7) 1.692 us | fetch_next_timer_interrupt(basej=4294981640, basem=357956000000, base_local=0xffff88823c3ae040, base_global=0xffff88823c3af300, tevt=0xffff888100e47cb8); -0 [007] d..4. 358.966012: 7) | tmigr_cpu_deactivate(nextexp=357988000000) { -0 [007] d..4. 358.966013: 7) | _raw_spin_lock(lock=0xffff88823c3b2320) { -0 [007] d..4. 358.966014: 7) 0.981 us | preempt_count_add(val=1); -0 [007] d..5. 358.966017: 7) 1.058 us | do_raw_spin_lock(lock=0xffff88823c3b2320); -0 [007] d..4. 358.966019: 7) 5.824 us | } -0 [007] d..5. 358.966021: 7) | tmigr_inactive_up(group=0xffff888100cb9000, child=0x0, data=0xffff888100e47bc0) { -0 [007] d..5. 358.966022: 7) | tmigr_update_events(group=0xffff888100cb9000, child=0x0, data=0xffff888100e47bc0) { Notice the "tracer: nop" at the top there. The current tracer is the "nop" tracer, but the content is obviously the function graph tracer. Enabling function graph tracing will cause it to register again and trigger a warning in the accounting: ~# echo function_graph > /sys/kernel/tracing/current_tracer -bash: echo: write error: Device or resource busy With the dmesg of: ------------[ cut here ]------------ WARNING: CPU: 7 PID: 1095 at kernel/trace/ftrace.c:3509 ftrace_startup_subops+0xc1e/0x1000 Modules linked in: kvm_intel kvm irqbypass CPU: 7 UID: 0 PID: 1095 Comm: bash Not tainted 6.16.0-rc2-test-00006-gea03de4105d3 #24 PREEMPT Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:ftrace_startup_subops+0xc1e/0x1000 Code: 48 b8 22 01 00 00 00 00 ad de 49 89 84 24 88 01 00 00 8b 44 24 08 89 04 24 e9 c3 f7 ff ff c7 04 24 ed ff ff ff e9 b7 f7 ff ff <0f> 0b c7 04 24 f0 ff ff ff e9 a9 f7 ff ff c7 04 24 f4 ff ff ff e9 RSP: 0018:ffff888133cff948 EFLAGS: 00010202 RAX: 0000000000000001 RBX: 1ffff1102679ff31 RCX: 0000000000000000 RDX: 1ffffffff0b27a60 RSI: ffffffff8593d2f0 RDI: ffffffff85941140 RBP: 00000000000c2041 R08: ffffffffffffffff R09: ffffed1020240221 R10: ffff88810120110f R11: ffffed1020240214 R12: ffffffff8593d2f0 R13: ffffffff8593d300 R14: ffffffff85941140 R15: ffffffff85631100 FS: 00007f7ec6f28740(0000) GS:ffff8882b5251000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f7ec6f181c0 CR3: 000000012f1d0005 CR4: 0000000000172ef0 Call Trace: ? __pfx_ftrace_startup_subops+0x10/0x10 ? find_held_lock+0x2b/0x80 ? ftrace_stub_direct_tramp+0x10/0x10 ? ftrace_stub_direct_tramp+0x10/0x10 ? trace_preempt_on+0xd0/0x110 ? __pfx_trace_graph_entry_args+0x10/0x10 register_ftrace_graph+0x4d2/0x1020 ? tracing_reset_online_cpus+0x14b/0x1e0 ? __pfx_register_ftrace_graph+0x10/0x10 ? ring_buffer_record_enable+0x16/0x20 ? tracing_reset_online_cpus+0x153/0x1e0 ? __pfx_tracing_reset_online_cpus+0x10/0x10 ? __pfx_trace_graph_return+0x10/0x10 graph_trace_init+0xfd/0x160 tracing_set_tracer+0x500/0xa80 ? __pfx_tracing_set_tracer+0x10/0x10 ? lock_release+0x181/0x2d0 ? _copy_from_user+0x26/0xa0 tracing_set_trace_write+0x132/0x1e0 ? __pfx_tracing_set_trace_write+0x10/0x10 ? ftrace_graph_func+0xcc/0x140 ? ftrace_stub_direct_tramp+0x10/0x10 ? ftrace_stub_direct_tramp+0x10/0x10 ? ftrace_stub_direct_tramp+0x10/0x10 vfs_write+0x1d0/0xe90 ? __pfx_vfs_write+0x10/0x10 Have the setting of the funcgraph-args check if function_graph tracer is the current tracer of the instance, and if not, do nothing, as there's nothing to do (the option is checked when function_graph tracing starts). Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Cc: Mark Rutland Link: https://lore.kernel.org/20250618073801.057ea636@gandalf.local.home Fixes: c7a60a733c373 ("ftrace: Have funcgraph-args take affect during tracing") Closes: https://lore.kernel.org/all/4ab1a7bdd0174ab09c7b0d68cdbff9a4@huawei.com/ Reported-by: Changbin Du Tested-by: Changbin Du Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 9234e2c39abf..14d74a7491b8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -455,10 +455,16 @@ static int graph_trace_init(struct trace_array *tr) return 0; } +static struct tracer graph_trace; + static int ftrace_graph_trace_args(struct trace_array *tr, int set) { trace_func_graph_ent_t entry; + /* Do nothing if the current tracer is not this tracer */ + if (tr->current_trace != &graph_trace) + return 0; + if (set) entry = trace_graph_entry_args; else From 2aebf5ee43bf0ed225a09a30cf515d9f2813b759 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Wed, 18 Jun 2025 09:05:23 +0900 Subject: [PATCH 410/497] x86/alternatives: Fix int3 handling failure from broken text_poke array Since smp_text_poke_single() does not expect there is another text_poke request is queued, it can make text_poke_array not sorted or cause a buffer overflow on the text_poke_array.vec[]. This will cause an Oops in int3 because of bsearch failing; CPU 0 CPU 1 CPU 2 ----- ----- ----- smp_text_poke_batch_add() smp_text_poke_single() <<-- Adds out of order [Fails o find address in text_poke_array ] OOPS! Or unhandled page fault because of a buffer overflow; CPU 0 CPU 1 ----- ----- smp_text_poke_batch_add() <<+ ... | smp_text_poke_batch_add() <<-- Adds TEXT_POKE_ARRAY_MAX times. smp_text_poke_single() { __smp_text_poke_batch_add() <<-- Adds entry at TEXT_POKE_ARRAY_MAX + 1 smp_text_poke_batch_finish() [Unhandled page fault because text_poke_array.nr_entries is overwritten] BUG! } Use smp_text_poke_batch_add() instead of __smp_text_poke_batch_add() so that it correctly flush the queue if needed. Closes: https://lore.kernel.org/all/CA+G9fYsLu0roY3DV=tKyqP7FEKbOEETRvTDhnpPxJGbA=Cg+4w@mail.gmail.com/ Fixes: c8976ade0c1b ("x86/alternatives: Simplify smp_text_poke_single() by using tp_vec and existing APIs") Reported-by: Linux Kernel Functional Testing Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Tested-by: Linux Kernel Functional Testing Link: https://lkml.kernel.org/r/\ 175020512308.3582717.13631440385506146631.stgit@mhiramat.tok.corp.google.com --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9ae80fa904a2..ea1d984166cd 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -3138,6 +3138,6 @@ void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, c */ void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate) { - __smp_text_poke_batch_add(addr, opcode, len, emulate); + smp_text_poke_batch_add(addr, opcode, len, emulate); smp_text_poke_batch_finish(); } From eab9dcb76b9fca47402c9e93afca243e745a0f02 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sat, 14 Jun 2025 20:59:24 +0530 Subject: [PATCH 411/497] Documentation: embargoed-hardware-issues.rst: Add myself for Power Adding myself as the contact for Power Signed-off-by: Madhavan Srinivasan Link: https://lore.kernel.org/r/20250614152925.82831-1-maddy@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- Documentation/process/embargoed-hardware-issues.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index da6bf0f6d01e..34e00848e0da 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -290,6 +290,7 @@ an involved disclosed party. The current ambassadors list: AMD Tom Lendacky Ampere Darren Hart ARM Catalin Marinas + IBM Power Madhavan Srinivasan IBM Z Christian Borntraeger Intel Tony Luck Qualcomm Trilok Soni From bbf10cd686835d5a4b8566dc73a3b00b4cd7932a Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 18 Jun 2025 16:38:25 +0200 Subject: [PATCH 412/497] PCI: pciehp: Ignore belated Presence Detect Changed caused by DPC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit c3be50f7547c ("PCI: pciehp: Ignore Presence Detect Changed caused by DPC") sought to ignore Presence Detect Changed events occurring as a side effect of Downstream Port Containment. The commit awaits recovery from DPC and then clears events which occurred in the meantime. However if the first event seen after DPC is Data Link Layer State Changed, only that event is cleared and not Presence Detect Changed. The object of the commit is thus defeated. That's because pciehp_ist() computes the events to clear based on the local "events" variable instead of "ctrl->pending_events". The former contains the events that had occurred when pciehp_ist() was entered, whereas the latter also contains events that have accumulated while awaiting DPC recovery. In practice, the order of PDC and DLLSC events is arbitrary and the delay in-between can be several milliseconds. So change the logic to always clear PDC events, even if they come after an initial DLLSC event. Fixes: c3be50f7547c ("PCI: pciehp: Ignore Presence Detect Changed caused by DPC") Reported-by: Lương Việt Hoàng Reported-by: Joel Mathew Thomas Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765#c165 Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Tested-by: Lương Việt Hoàng Tested-by: Joel Mathew Thomas Link: https://patch.msgid.link/d9c4286a16253af7e93eaf12e076e3ef3546367a.1750257164.git.lukas@wunner.de --- drivers/pci/hotplug/pciehp_hpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index ebd342bda235..91d2d92717d9 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -771,7 +771,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; if (!ctrl->inband_presence_disabled) - ignored_events |= events & PCI_EXP_SLTSTA_PDC; + ignored_events |= PCI_EXP_SLTSTA_PDC; events &= ~ignored_events; pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events); From a55737dab6ba63eb4241e9c6547629058af31e12 Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Thu, 29 May 2025 11:27:37 +0800 Subject: [PATCH 413/497] drm/amdkfd: move SDMA queue reset capability check to node_show Relocate the per-SDMA queue reset capability check from kfd_topology_set_capabilities() to node_show() to ensure we read the latest value of sdma.supported_reset after all IP blocks are initialized. Fixes: ceb7114c961b ("drm/amdkfd: flag per-sdma queue reset supported to user space") Reviewed-by: Jonathan Kim Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit e17df7b086cf908cedd919f448da9e00028419bb) --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index baa2374acdeb..4ec73f33535e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -510,6 +510,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.capability |= HSA_CAP_AQL_QUEUE_DOUBLE_MAP; + if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0) && + (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; + sysfs_show_32bit_prop(buffer, offs, "max_engine_clk_fcompute", dev->node_props.max_engine_clk_fcompute); @@ -2008,8 +2012,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev) if (!amdgpu_sriov_vf(dev->gpu->adev)) dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED; - if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) - dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED; } else { dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 | HSA_DBG_WATCH_ADDR_MASK_HI_BIT; From 3251b69b7efb82eba24c9e168a6142a3078de72f Mon Sep 17 00:00:00 2001 From: Peichen Huang Date: Mon, 26 May 2025 16:04:10 +0800 Subject: [PATCH 414/497] drm/amd/display: Add dc cap for dp tunneling [WHAT] 1. add dc cap for dp tunneling 2. add function to get index of host router Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Cruise Hung Signed-off-by: Peichen Huang Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 29e178d13979cf6fdb42c5fe2dfec2da2306c4ad) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc.c | 33 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 8 ++++- .../dc/resource/dcn31/dcn31_resource.c | 3 ++ .../dc/resource/dcn314/dcn314_resource.c | 3 ++ .../dc/resource/dcn35/dcn35_resource.c | 3 ++ .../dc/resource/dcn351/dcn351_resource.c | 3 ++ .../dc/resource/dcn36/dcn36_resource.c | 3 ++ 7 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 56d011a1323c..b34b5b52236d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -241,6 +241,7 @@ static bool create_links( DC_LOG_DC("BIOS object table - end"); /* Create a link for each usb4 dpia port */ + dc->lowest_dpia_link_index = MAX_LINKS; for (i = 0; i < dc->res_pool->usb4_dpia_count; i++) { struct link_init_data link_init_params = {0}; struct dc_link *link; @@ -253,6 +254,9 @@ static bool create_links( link = dc->link_srv->create_link(&link_init_params); if (link) { + if (dc->lowest_dpia_link_index > dc->link_count) + dc->lowest_dpia_link_index = dc->link_count; + dc->links[dc->link_count] = link; link->dc = dc; ++dc->link_count; @@ -6376,6 +6380,35 @@ unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context) else return 0; } +/** + *********************************************************************************************** + * dc_get_host_router_index: Get index of host router from a dpia link + * + * This function return a host router index of the target link. If the target link is dpia link. + * + * @param [in] link: target link + * @param [out] host_router_index: host router index of the target link + * + * @return: true if the host router index is found and valid. + * + *********************************************************************************************** + */ +bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index) +{ + struct dc *dc = link->ctx->dc; + + if (link->ep_type != DISPLAY_ENDPOINT_USB4_DPIA) + return false; + + if (link->link_index < dc->lowest_dpia_link_index) + return false; + + *host_router_index = (link->link_index - dc->lowest_dpia_link_index) / dc->caps.num_of_dpias_per_host_router; + if (*host_router_index < dc->caps.num_of_host_routers) + return true; + else + return false; +} bool dc_is_cursor_limit_pending(struct dc *dc) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 1d917be36fc4..f41073c0147e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -66,7 +66,8 @@ struct dmub_notification; #define MAX_STREAMS 6 #define MIN_VIEWPORT_SIZE 12 #define MAX_NUM_EDP 2 -#define MAX_HOST_ROUTERS_NUM 2 +#define MAX_HOST_ROUTERS_NUM 3 +#define MAX_DPIA_PER_HOST_ROUTER 2 /* Display Core Interfaces */ struct dc_versions { @@ -305,6 +306,8 @@ struct dc_caps { /* Conservative limit for DCC cases which require ODM4:1 to support*/ uint32_t dcc_plane_width_limit; struct dc_scl_caps scl_caps; + uint8_t num_of_host_routers; + uint8_t num_of_dpias_per_host_router; }; struct dc_bug_wa { @@ -1603,6 +1606,7 @@ struct dc { uint8_t link_count; struct dc_link *links[MAX_LINKS]; + uint8_t lowest_dpia_link_index; struct link_service *link_srv; struct dc_state *current_state; @@ -2595,6 +2599,8 @@ struct dc_power_profile dc_get_power_profile_for_dc_state(const struct dc_state unsigned int dc_get_det_buffer_size_from_state(const struct dc_state *context); +bool dc_get_host_router_index(const struct dc_link *link, unsigned int *host_router_index); + /* DSC Interfaces */ #include "dc_dsc.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 7e0af5297dc4..51ca0b2959fc 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -1954,6 +1954,9 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; dc->config.disable_hbr_audio_dp2 = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index d96bc6cb73ad..8383e2e59be5 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -1885,6 +1885,9 @@ static bool dcn314_resource_construct( dc->caps.max_disp_clock_khz_at_vmin = 650000; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* Use pipe context based otg sync logic */ dc->config.use_pipe_ctx_sync_logic = true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index 72c6cf047db0..e01aa2f2e13e 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -1894,6 +1894,9 @@ static bool dcn35_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 989a270f7dea..4ebe4e00a4f8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -1866,6 +1866,9 @@ static bool dcn351_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index 48e1f234185f..db36b8f9ce65 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -1867,6 +1867,9 @@ static bool dcn36_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + dc->caps.num_of_host_routers = 2; + dc->caps.num_of_dpias_per_host_router = 2; + /* max_disp_clock_khz_at_vmin is slightly lower than the STA value in order * to provide some margin. * It's expected for furture ASIC to have equal or higher value, in order to From d358a51444c88bcc995e471dc8cc840f19e4b374 Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Wed, 26 Feb 2025 10:03:48 -0500 Subject: [PATCH 415/497] drm/amd/display: Get LTTPR IEEE OUI/Device ID From Closest LTTPR To Host [WHY] These fields are read for the explicit purpose of detecting embedded LTTPRs (i.e. between host ASIC and the user-facing port), and thus need to calculate the correct DPCD address offset based on LTTPR count to target the appropriate LTTPR's DPCD register space with these queries. [HOW] Cascaded LTTPRs in a link each snoop and increment LTTPR count when queried via DPCD read, so an LTTPR embedded in a source device (e.g. USB4 port on a laptop) will always be addressible using the max LTTPR count seen by the host. Therefore we simply need to use a recently added helper function to calculate the correct DPCD address to target potentially embedded LTTPRs based on the received LTTPR count. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Wenjing Liu Signed-off-by: Michael Strauss Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 791897f5c77a2a65d0e500be4743af2ddf6eb061) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dc_dp_types.h | 4 +- .../dc/link/protocols/link_dp_capability.c | 38 +++++++++++++++---- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 0bad8304ccf6..d346f8ae1634 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -1172,8 +1172,8 @@ struct dc_lttpr_caps { union dp_128b_132b_supported_lttpr_link_rates supported_128b_132b_rates; union dp_alpm_lttpr_cap alpm; uint8_t aux_rd_interval[MAX_REPEATER_CNT - 1]; - uint8_t lttpr_ieee_oui[3]; - uint8_t lttpr_device_id[6]; + uint8_t lttpr_ieee_oui[3]; // Always read from closest LTTPR to host + uint8_t lttpr_device_id[6]; // Always read from closest LTTPR to host }; struct dc_dongle_dfp_cap_ext { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index a5127c2d47ef..0f965380a9b4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -385,9 +385,15 @@ bool dp_is_128b_132b_signal(struct pipe_ctx *pipe_ctx) bool dp_is_lttpr_present(struct dc_link *link) { /* Some sink devices report invalid LTTPR revision, so don't validate against that cap */ - return (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) != 0 && + uint32_t lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + bool is_lttpr_present = (lttpr_count > 0 && link->dpcd_caps.lttpr_caps.max_lane_count > 0 && link->dpcd_caps.lttpr_caps.max_lane_count <= 4); + + if (lttpr_count > 0 && !is_lttpr_present) + DC_LOG_ERROR("LTTPR count is nonzero but invalid lane count reported. Assuming no LTTPR present.\n"); + + return is_lttpr_present; } /* in DP compliance test, DPR-120 may have @@ -1551,6 +1557,8 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) uint8_t lttpr_dpcd_data[10] = {0}; enum dc_status status; bool is_lttpr_present; + uint32_t lttpr_count; + uint32_t closest_lttpr_offset; /* Logic to determine LTTPR support*/ bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware; @@ -1602,20 +1610,22 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) lttpr_dpcd_data[DP_LTTPR_ALPM_CAPABILITIES - DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV]; + lttpr_count = dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt); + /* If this chip cap is set, at least one retimer must exist in the chain * Override count to 1 if we receive a known bad count (0 or an invalid value) */ if (((link->chip_caps & AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK) == AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN) && - (dp_parse_lttpr_repeater_count(link->dpcd_caps.lttpr_caps.phy_repeater_cnt) == 0)) { + lttpr_count == 0) { /* If you see this message consistently, either the host platform has FIXED_VS flag * incorrectly configured or the sink device is returning an invalid count. */ DC_LOG_ERROR("lttpr_caps phy_repeater_cnt is 0x%x, forcing it to 0x80.", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); link->dpcd_caps.lttpr_caps.phy_repeater_cnt = 0x80; + lttpr_count = 1; DC_LOG_DC("lttpr_caps forced phy_repeater_cnt = %d\n", link->dpcd_caps.lttpr_caps.phy_repeater_cnt); } - /* Attempt to train in LTTPR transparent mode if repeater count exceeds 8. */ is_lttpr_present = dp_is_lttpr_present(link); DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present); @@ -1623,11 +1633,25 @@ enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link) if (is_lttpr_present) { CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: "); - core_link_read_dpcd(link, DP_LTTPR_IEEE_OUI, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui)); - CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), "LTTPR IEEE OUI: "); + // Identify closest LTTPR to determine if workarounds required for known embedded LTTPR + closest_lttpr_offset = dp_get_closest_lttpr_offset(lttpr_count); - core_link_read_dpcd(link, DP_LTTPR_DEVICE_ID, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id)); - CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), "LTTPR Device ID: "); + core_link_read_dpcd(link, (DP_LTTPR_IEEE_OUI + closest_lttpr_offset), + link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui)); + core_link_read_dpcd(link, (DP_LTTPR_DEVICE_ID + closest_lttpr_offset), + link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id)); + + if (lttpr_count > 1) { + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), + "Closest LTTPR To Host's IEEE OUI: "); + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), + "Closest LTTPR To Host's LTTPR Device ID: "); + } else { + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_ieee_oui, sizeof(link->dpcd_caps.lttpr_caps.lttpr_ieee_oui), + "LTTPR IEEE OUI: "); + CONN_DATA_DETECT(link, link->dpcd_caps.lttpr_caps.lttpr_device_id, sizeof(link->dpcd_caps.lttpr_caps.lttpr_device_id), + "LTTPR Device ID: "); + } } return status; From 0d57dd1765d311111d9885346108c4deeae1deb4 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 21 May 2025 16:40:25 -0400 Subject: [PATCH 416/497] drm/amd/display: Add more checks for DSC / HUBP ONO guarantees [WHY] For non-zero DSC instances it's possible that the HUBP domain required to drive it for sequential ONO ASICs isn't met, potentially causing the logic to the tile to enter an undefined state leading to a system hang. [HOW] Add more checks to ensure that the HUBP domain matching the DSC instance is appropriately powered. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Duncan Ma Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit da63df07112e5a9857a8d2aaa04255c4206754ec) Cc: stable@vger.kernel.org --- .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index c814d957305a..a267f574b619 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1047,6 +1047,15 @@ void dcn35_calc_blocks_to_gate(struct dc *dc, struct dc_state *context, if (dc->caps.sequential_ono) { update_state->pg_pipe_res_update[PG_HUBP][pipe_ctx->stream_res.dsc->inst] = false; update_state->pg_pipe_res_update[PG_DPP][pipe_ctx->stream_res.dsc->inst] = false; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (!pipe_ctx->top_pipe && pipe_ctx->plane_res.hubp && + pipe_ctx->plane_res.hubp->inst != pipe_ctx->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = false; + update_state->pg_pipe_res_update[PG_DPP][j] = false; + } + } } } @@ -1193,6 +1202,25 @@ void dcn35_calc_blocks_to_ungate(struct dc *dc, struct dc_state *context, update_state->pg_pipe_res_update[PG_HDMISTREAM][0] = true; if (dc->caps.sequential_ono) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + + if (new_pipe->stream_res.dsc && !new_pipe->top_pipe && + update_state->pg_pipe_res_update[PG_DSC][new_pipe->stream_res.dsc->inst]) { + update_state->pg_pipe_res_update[PG_HUBP][new_pipe->stream_res.dsc->inst] = true; + update_state->pg_pipe_res_update[PG_DPP][new_pipe->stream_res.dsc->inst] = true; + + /* All HUBP/DPP instances must be powered if the DSC inst != HUBP inst */ + if (new_pipe->plane_res.hubp && + new_pipe->plane_res.hubp->inst != new_pipe->stream_res.dsc->inst) { + for (j = 0; j < dc->res_pool->pipe_count; ++j) { + update_state->pg_pipe_res_update[PG_HUBP][j] = true; + update_state->pg_pipe_res_update[PG_DPP][j] = true; + } + } + } + } + for (i = dc->res_pool->pipe_count - 1; i >= 0; i--) { if (update_state->pg_pipe_res_update[PG_HUBP][i] && update_state->pg_pipe_res_update[PG_DPP][i]) { From 8724a5380c4390eed81e271d22f34ff06453ded9 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 29 May 2025 10:59:19 -0600 Subject: [PATCH 417/497] drm/amd/display: Fix mpv playback corruption on weston [WHAT] Severe video playback corruption is observed in the following setup: weston 14.0.90 (built from source) + mpv v0.40.0 with command: mpv bbb_sunflower_1080p_60fps_normal.mp4 --vo=gpu [HOW] ABGR16161616 needs to be included in dml2/2.1 translation. Cc: Mario Limonciello Cc: Alex Deucher Acked-by: Aurabindo Pillai Reviewed-by: Harry Wentland Reviewed-by: Austin Zheng Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit d023de809f85307ca819a9dbbceee6ae1f50e2ad) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 1 + drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index d47cacfdb695..2aa6d44bb359 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -788,6 +788,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm plane->pixel_format = dml2_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: plane->pixel_format = dml2_444_64; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index 5de775fd8fce..208630754c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -953,6 +953,7 @@ static void populate_dml_surface_cfg_from_plane_state(enum dml_project_id dml2_p out->SourcePixelFormat[location] = dml_420_10; break; case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: + case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616: case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: out->SourcePixelFormat[location] = dml_444_64; From 158f9944ac05dafd2d3a23d0688e6cf40ef68b90 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Tue, 27 May 2025 16:47:40 -0400 Subject: [PATCH 418/497] drm/amd/display: Fix RMCM programming seq errors [WHY & HOW] Fix RMCM programming sequence errors and mapping issues to pass the RMCM test. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Dmytro Laktyushkin Signed-off-by: Yihan Zhu Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 11baa4975025033547f45f5894087a0dda6efbb8) Cc: stable@vger.kernel.org --- .../dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index c4dad7164d31..5b62cd19d979 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -4685,7 +4685,10 @@ static void calculate_tdlut_setting( //the tdlut is fetched during the 2 row times of prefetch. if (p->setup_for_tdlut) { *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1); - *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + if (*p->tdlut_bytes_per_frame > p->cursor_buffer_size * 1024) + *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate; + else + *p->tdlut_opt_time = 0; *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate; *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0); } From ffcaed1d7ecef31198000dfbbea791f30f7ca437 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 29 May 2025 11:33:44 -0500 Subject: [PATCH 419/497] drm/amd/display: Only read ACPI backlight caps once [WHY] Backlight caps are read already in amdgpu_dm_update_backlight_caps(). They may be updated by update_connector_ext_caps(). Reading again when registering backlight device may cause wrong values to be used. [HOW] Use backlight caps already registered to the dm. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Roman Li Signed-off-by: Mario Limonciello Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 148144f6d2f14b02eaaa39b86bbe023cbff350bd) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3100f641ac6..0b5d5ab14a69 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4908,7 +4908,7 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) struct drm_device *drm = aconnector->base.dev; struct amdgpu_display_manager *dm = &drm_to_adev(drm)->dm; struct backlight_properties props = { 0 }; - struct amdgpu_dm_backlight_caps caps = { 0 }; + struct amdgpu_dm_backlight_caps *caps; char bl_name[16]; int min, max; @@ -4922,20 +4922,20 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) return; } - amdgpu_acpi_get_backlight_caps(&caps); - if (caps.caps_valid && get_brightness_range(&caps, &min, &max)) { + caps = &dm->backlight_caps[aconnector->bl_idx]; + if (get_brightness_range(caps, &min, &max)) { if (power_supply_is_system_supplied() > 0) - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.ac_level, 100); + props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->ac_level, 100); else - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps.dc_level, 100); + props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->dc_level, 100); /* min is zero, so max needs to be adjusted */ props.max_brightness = max - min; drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, - caps.ac_level, caps.dc_level); + caps->ac_level, caps->dc_level); } else props.brightness = AMDGPU_MAX_BL_LEVEL; - if (caps.data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) + if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) drm_info(drm, "Using custom brightness curve\n"); props.max_brightness = AMDGPU_MAX_BL_LEVEL; props.type = BACKLIGHT_RAW; From 16dc8bc27c2aa3c93905d3e885e27f1e3535f09a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 29 May 2025 09:46:32 -0500 Subject: [PATCH 420/497] drm/amd/display: Export full brightness range to userspace [WHY] Userspace currently is offered a range from 0-0xFF but the PWM is programmed from 0-0xFFFF. This can be limiting to some software that wants to apply greater granularity. [HOW] Convert internally to firmware values only when mapping custom brightness curves because these are in 0-0xFF range. Advertise full PWM range to userspace. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Roman Li Signed-off-by: Mario Limonciello Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 8dbd72cb790058ce52279af38a43c2b302fdd3e5) Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 43 ++++++++++++------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0b5d5ab14a69..bc4cd11bfc79 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4718,9 +4718,23 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, return 1; } -static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, - uint32_t *brightness) +/* Rescale from [min..max] to [0..AMDGPU_MAX_BL_LEVEL] */ +static inline u32 scale_input_to_fw(int min, int max, u64 input) { + return DIV_ROUND_CLOSEST_ULL(input * AMDGPU_MAX_BL_LEVEL, max - min); +} + +/* Rescale from [0..AMDGPU_MAX_BL_LEVEL] to [min..max] */ +static inline u32 scale_fw_to_input(int min, int max, u64 input) +{ + return min + DIV_ROUND_CLOSEST_ULL(input * (max - min), AMDGPU_MAX_BL_LEVEL); +} + +static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *caps, + unsigned int min, unsigned int max, + uint32_t *user_brightness) +{ + u32 brightness = scale_input_to_fw(min, max, *user_brightness); u8 prev_signal = 0, prev_lum = 0; int i = 0; @@ -4731,7 +4745,7 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap return; /* choose start to run less interpolation steps */ - if (caps->luminance_data[caps->data_points/2].input_signal > *brightness) + if (caps->luminance_data[caps->data_points/2].input_signal > brightness) i = caps->data_points/2; do { u8 signal = caps->luminance_data[i].input_signal; @@ -4742,17 +4756,18 @@ static void convert_custom_brightness(const struct amdgpu_dm_backlight_caps *cap * brightness < signal: interpolate between previous and current luminance numerator * brightness > signal: find next data point */ - if (*brightness > signal) { + if (brightness > signal) { prev_signal = signal; prev_lum = lum; i++; continue; } - if (*brightness < signal) + if (brightness < signal) lum = prev_lum + DIV_ROUND_CLOSEST((lum - prev_lum) * - (*brightness - prev_signal), + (brightness - prev_signal), signal - prev_signal); - *brightness = DIV_ROUND_CLOSEST(lum * *brightness, 101); + *user_brightness = scale_fw_to_input(min, max, + DIV_ROUND_CLOSEST(lum * brightness, 101)); return; } while (i < caps->data_points); } @@ -4765,11 +4780,10 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c if (!get_brightness_range(caps, &min, &max)) return brightness; - convert_custom_brightness(caps, &brightness); + convert_custom_brightness(caps, min, max, &brightness); - // Rescale 0..255 to min..max - return min + DIV_ROUND_CLOSEST((max - min) * brightness, - AMDGPU_MAX_BL_LEVEL); + // Rescale 0..max to min..max + return min + DIV_ROUND_CLOSEST_ULL((u64)(max - min) * brightness, max); } static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps, @@ -4782,8 +4796,8 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap if (brightness < min) return 0; - // Rescale min..max to 0..255 - return DIV_ROUND_CLOSEST(AMDGPU_MAX_BL_LEVEL * (brightness - min), + // Rescale min..max to 0..max + return DIV_ROUND_CLOSEST_ULL((u64)max * (brightness - min), max - min); } @@ -4933,11 +4947,10 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, caps->ac_level, caps->dc_level); } else - props.brightness = AMDGPU_MAX_BL_LEVEL; + props.brightness = props.max_brightness = AMDGPU_MAX_BL_LEVEL; if (caps->data_points && !(amdgpu_dc_debug_mask & DC_DISABLE_CUSTOM_BRIGHTNESS_CURVE)) drm_info(drm, "Using custom brightness curve\n"); - props.max_brightness = AMDGPU_MAX_BL_LEVEL; props.type = BACKLIGHT_RAW; snprintf(bl_name, sizeof(bl_name), "amdgpu_bl%d", From 0bbf5fd86c585d437b75003f11365b324360a5d6 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Wed, 4 Jun 2025 21:00:44 +0800 Subject: [PATCH 421/497] drm/amdgpu: Add kicker device detection 1. add kicker device list 2. add kicker device checking helper function Signed-off-by: Frank Min Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit 09aa2b408f4ab689c3541d22b0968de0392ee406) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 17 +++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 2505c46a9c3d..eaddc441c51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -30,6 +30,10 @@ #define AMDGPU_UCODE_NAME_MAX (128) +static const struct kicker_device kicker_device_list[] = { + {0x744B, 0x00}, +}; + static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) { DRM_DEBUG("size_bytes: %u\n", le32_to_cpu(hdr->size_bytes)); @@ -1387,6 +1391,19 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return NULL; } +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kicker_device_list); i++) { + if (adev->pdev->device == kicker_device_list[i].device && + adev->pdev->revision == kicker_device_list[i].revision) + return true; + } + + return false; +} + void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len) { int maj, min, rev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 9e89c3487be5..6349aad6da35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -605,6 +605,11 @@ struct amdgpu_firmware { uint32_t pldm_version; }; +struct kicker_device{ + unsigned short device; + u8 revision; +}; + void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_imu_hdr(const struct common_firmware_header *hdr); @@ -632,5 +637,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len); +bool amdgpu_is_kicker_fw(struct amdgpu_device *adev); #endif From 854171405e7f093532b33d8ed0875b9e34fc55b4 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Wed, 4 Jun 2025 21:17:05 +0800 Subject: [PATCH 422/497] drm/amdgpu: add kicker fws loading for gfx11/smu13/psp13 1. Add kicker firmwares loading for gfx11/smu13/psp13 2. Register additional MODULE_FIRMWARE entries for kicker fws - gc_11_0_0_rlc_kicker.bin - gc_11_0_0_imu_kicker.bin - psp_13_0_0_sos_kicker.bin - psp_13_0_0_ta_kicker.bin - smu_13_0_0_kicker.bin Signed-off-by: Frank Min Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit fb5ec2174d70a8989bc207d257db90ffeca3b163) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 16 ++++++++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/imu_v11_0.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 12 +++++++++--- 5 files changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e6f0b035e20b..c14f63cefe67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3522,8 +3522,12 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_sos.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3799,8 +3803,12 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_ta.bin", chip_name); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta_kicker.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta.bin", chip_name); if (err) return err; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index afd6d59164bf..ec9b84f92d46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -85,6 +85,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); @@ -759,6 +760,10 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, "amdgpu/gc_11_0_0_rlc_1.bin"); + else if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, AMDGPU_UCODE_REQUIRED, diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index cfa91d709d49..cc626036ed9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -32,6 +32,7 @@ #include "gc/gc_11_0_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); @@ -51,8 +52,12 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index df612fd9cc50..ead616c11705 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -42,7 +42,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index a7167668d189..1c7235935d14 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -58,6 +58,7 @@ MODULE_FIRMWARE("amdgpu/aldebaran_smc.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_0.bin"); +MODULE_FIRMWARE("amdgpu/smu_13_0_0_kicker.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_7.bin"); MODULE_FIRMWARE("amdgpu/smu_13_0_10.bin"); @@ -92,7 +93,7 @@ const int pmfw_decoded_link_width[7] = {0, 1, 2, 4, 8, 12, 16}; int smu_v13_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - char ucode_prefix[15]; + char ucode_prefix[30]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -103,8 +104,13 @@ int smu_v13_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s.bin", ucode_prefix); + + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; From 09b585592fa481384597c81388733aed4a04dd05 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 11 Jun 2025 15:02:09 +0800 Subject: [PATCH 423/497] drm/amdgpu: Fix SDMA engine reset with logical instance ID This commit makes the following improvements to SDMA engine reset handling: 1. Clarifies in the function documentation that instance_id refers to a logical ID 2. Adds conversion from logical to physical instance ID before performing reset using GET_INST(SDMA0, instance_id) macro 3. Improves error messaging to indicate when a logical instance reset fails 4. Adds better code organization with blank lines for readability The change ensures proper SDMA engine reset by using the correct physical instance ID while maintaining the logical ID interface for callers. V2: Remove harvest_config check and convert directly to physical instance (Lijo) Suggested-by: Jonathan Kim Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 5efa6217c239ed1ceec0f0414f9b6f6927387dfc) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 6716ac281c49..9b54a1ece447 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -540,8 +540,10 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) case IP_VERSION(4, 4, 2): case IP_VERSION(4, 4, 4): case IP_VERSION(4, 4, 5): - /* For SDMA 4.x, use the existing DPM interface for backward compatibility */ - r = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + /* For SDMA 4.x, use the existing DPM interface for backward compatibility, + * we need to convert the logical instance ID to physical instance ID before reset. + */ + r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, instance_id)); break; case IP_VERSION(5, 0, 0): case IP_VERSION(5, 0, 1): @@ -568,7 +570,7 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) /** * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device - * @instance_id: ID of the SDMA engine instance to reset + * @instance_id: Logical ID of the SDMA engine instance to reset * * Returns: 0 on success, or a negative error code on failure. */ @@ -601,7 +603,7 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) /* Perform the SDMA reset for the specified instance */ ret = amdgpu_sdma_soft_reset(adev, instance_id); if (ret) { - dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id); + dev_err(adev->dev, "Failed to reset SDMA logical instance %u\n", instance_id); goto exit; } From caade9d69f2e2b76d2e2d47089736a99135b8772 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 11 Jun 2025 15:07:11 +0800 Subject: [PATCH 424/497] drm/amdgpu: Use logical instance ID for SDMA v4_4_2 queue operations Simplify SDMA v4_4_2 queue reset and stop operations by: 1. Removing GET_INST(SDMA0) conversion for ring->me 2. Using the logical instance ID (ring->me) directly 3. Maintaining consistent behavior with other SDMA queue operations This change aligns with the existing queue handling logic where ring->me already represents the correct instance identifier. Signed-off-by: Lijo Lazar Signed-off-by: Jesse Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 3bab282dfe25dff7a55add432f56898505a6cc6c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 9c169112a5e7..3de125062ee3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1670,7 +1670,7 @@ static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring) static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - u32 id = GET_INST(SDMA0, ring->me); + u32 id = ring->me; int r; if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) @@ -1686,7 +1686,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 instance_id = GET_INST(SDMA0, ring->me); + u32 instance_id = ring->me; u32 inst_mask; uint64_t rptr; From 46e15197b513e60786a44107759d6ca293d6288c Mon Sep 17 00:00:00 2001 From: Sonny Jiang Date: Thu, 12 Jun 2025 11:01:08 -0400 Subject: [PATCH 425/497] drm/amdgpu: VCN v5_0_1 to prevent FW checking RB during DPG pause Add a protection to ensure programming are all complete prior VCPU starting. This is a WA for an unintended VCPU running. Signed-off-by: Sonny Jiang Acked-by: Leo Liu Reviewed-by: Ruijing Dong Signed-off-by: Alex Deucher (cherry picked from commit c29521b529fa5e225feaf709d863a636ca0cbbfa) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c index 338cf43c45fe..cdefd7fcb0da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -669,6 +669,9 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + /* resetting ring, fw should not check RB ring */ + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + /* Pause dpg */ vcn_v5_0_1_pause_dpg_mode(vinst, &state); @@ -681,7 +684,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); @@ -692,6 +695,7 @@ static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB1_EN_MASK; WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* resetting done, fw can check RB ring */ fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, From b669507b637eb6b1aaecf347f193efccc65d756e Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 3 Jun 2025 18:30:55 -0600 Subject: [PATCH 426/497] drm/amd/display: Check dce_hwseq before dereferencing it [WHAT] hws was checked for null earlier in dce110_blank_stream, indicating hws can be null, and should be checked whenever it is used. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Hung Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher (cherry picked from commit 79db43611ff61280b6de58ce1305e0b2ecf675ad) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index e8730cc40edb..38e17b1796e1 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1225,7 +1225,7 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) return; if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { - if (!link->skip_implict_edp_power_control) + if (!link->skip_implict_edp_power_control && hws) hws->funcs.edp_backlight_control(link, false); link->dc->hwss.set_abm_immediate_disable(pipe_ctx); } From 785c536c31c0bb057252fddedb30d79ae4979f4b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 6 Jun 2025 10:29:28 +0530 Subject: [PATCH 427/497] drm/amdgpu: Release reset locks during failures Make sure to release reset domain lock in case of failures. Signed-off-by: Lijo Lazar Signed-off-by: Ce Sun Reviewed-by: Hawking Zhang Fixes: 11bb33766f66 ("drm/amdgpu: refactor amdgpu_device_gpu_recover") Signed-off-by: Alex Deucher (cherry picked from commit 1ab11a82681eb33a66f423216cb063e7f40c6f85) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 80 +++++++++++++++------- 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e1bab6a96cb6..500bf85b3ee8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6019,16 +6019,12 @@ static int amdgpu_device_health_check(struct list_head *device_list_handle) return ret; } -static int amdgpu_device_halt_activities(struct amdgpu_device *adev, - struct amdgpu_job *job, - struct amdgpu_reset_context *reset_context, - struct list_head *device_list, - struct amdgpu_hive_info *hive, - bool need_emergency_restart) +static int amdgpu_device_recovery_prepare(struct amdgpu_device *adev, + struct list_head *device_list, + struct amdgpu_hive_info *hive) { - struct list_head *device_list_handle = NULL; struct amdgpu_device *tmp_adev = NULL; - int i, r = 0; + int r; /* * Build list of devices to reset. @@ -6045,26 +6041,54 @@ static int amdgpu_device_halt_activities(struct amdgpu_device *adev, } if (!list_is_first(&adev->reset_list, device_list)) list_rotate_to_front(&adev->reset_list, device_list); - device_list_handle = device_list; } else { list_add_tail(&adev->reset_list, device_list); - device_list_handle = device_list; } if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { - r = amdgpu_device_health_check(device_list_handle); + r = amdgpu_device_health_check(device_list); if (r) return r; } - /* We need to lock reset domain only once both for XGMI and single device */ - tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, - reset_list); + return 0; +} + +static void amdgpu_device_recovery_get_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); +} + +static void amdgpu_device_recovery_put_reset_lock(struct amdgpu_device *adev, + struct list_head *device_list) +{ + struct amdgpu_device *tmp_adev = NULL; + + if (list_empty(device_list)) + return; + tmp_adev = + list_first_entry(device_list, struct amdgpu_device, reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +} + +static int amdgpu_device_halt_activities( + struct amdgpu_device *adev, struct amdgpu_job *job, + struct amdgpu_reset_context *reset_context, + struct list_head *device_list, struct amdgpu_hive_info *hive, + bool need_emergency_restart) +{ + struct amdgpu_device *tmp_adev = NULL; + int i, r = 0; /* block all schedulers and reset given job's ring */ - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - + list_for_each_entry(tmp_adev, device_list, reset_list) { amdgpu_device_set_mp1_state(tmp_adev); /* @@ -6252,11 +6276,6 @@ static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, amdgpu_ras_set_error_query_ready(tmp_adev, true); } - - tmp_adev = list_first_entry(device_list, struct amdgpu_device, - reset_list); - amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); - } @@ -6324,10 +6343,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, reset_context->hive = hive; INIT_LIST_HEAD(&device_list); + if (amdgpu_device_recovery_prepare(adev, &device_list, hive)) + goto end_reset; + + /* We need to lock reset domain only once both for XGMI and single device */ + amdgpu_device_recovery_get_reset_lock(adev, &device_list); + r = amdgpu_device_halt_activities(adev, job, reset_context, &device_list, hive, need_emergency_restart); if (r) - goto end_reset; + goto reset_unlock; if (need_emergency_restart) goto skip_sched_resume; @@ -6345,13 +6370,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, r = amdgpu_device_asic_reset(adev, &device_list, reset_context); if (r) - goto end_reset; + goto reset_unlock; skip_hw_reset: r = amdgpu_device_sched_resume(&device_list, reset_context, job_signaled); if (r) - goto end_reset; + goto reset_unlock; skip_sched_resume: amdgpu_device_gpu_resume(adev, &device_list, need_emergency_restart); +reset_unlock: + amdgpu_device_recovery_put_reset_lock(adev, &device_list); end_reset: if (hive) { mutex_unlock(&hive->hive_lock); @@ -6763,6 +6790,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta memset(&reset_context, 0, sizeof(reset_context)); INIT_LIST_HEAD(&device_list); + amdgpu_device_recovery_prepare(adev, &device_list, hive); + amdgpu_device_recovery_get_reset_lock(adev, &device_list); r = amdgpu_device_halt_activities(adev, NULL, &reset_context, &device_list, hive, false); if (hive) { @@ -6880,8 +6909,8 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) if (hive) { list_for_each_entry(tmp_adev, &device_list, reset_list) amdgpu_device_unset_mp1_state(tmp_adev); - amdgpu_device_unlock_reset_domain(adev->reset_domain); } + amdgpu_device_recovery_put_reset_lock(adev, &device_list); } if (hive) { @@ -6927,6 +6956,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) amdgpu_device_sched_resume(&device_list, NULL, NULL); amdgpu_device_gpu_resume(adev, &device_list, false); + amdgpu_device_recovery_put_reset_lock(adev, &device_list); adev->pcie_reset_ctx.occurs_dpc = false; if (hive) { From 7f3b16f3f229e37cc3e02e9e4e7106c523b119e9 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Mon, 16 Jun 2025 19:21:41 +0800 Subject: [PATCH 428/497] drm/amdgpu: Fix SDMA UTC_L1 handling during start/stop sequences This commit makes two key fixes to SDMA v4.4.2 handling: 1. disable UTC_L1 in sdma_cntl register when stopping SDMA engines by reading the current value before modifying UTC_L1_ENABLE bit. 2. Ensure UTC_L1_ENABLE is consistently managed by: - Adding the missing register write when enabling UTC_L1 during start - Keeping UTC_L1 enabled by default as per hardware requirements v2: Correct SDMA_CNTL setting (Philip) Suggested-by: Jonathan Kim Signed-off-by: Jesse Zhang Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 375bf564654e85a7b1b0657b191645b3edca1bda) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 3de125062ee3..cef68df4c663 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -490,7 +490,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; - u32 rb_cntl, ib_cntl; + u32 rb_cntl, ib_cntl, sdma_cntl; int i; for_each_inst(i, inst_mask) { @@ -502,6 +502,9 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + sdma_cntl = RREG32_SDMA(i, regSDMA_CNTL); + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, UTC_L1_ENABLE, 0); + WREG32_SDMA(i, regSDMA_CNTL, sdma_cntl); if (sdma[i]->use_doorbell) { doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); @@ -995,6 +998,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { /* enable context empty interrupt during initialization */ From ebe43542702c3d15d1a1d95e8e13b1b54076f05a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 Jun 2025 11:31:52 -0400 Subject: [PATCH 429/497] drm/amdgpu: switch job hw_fence to amdgpu_fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the amdgpu fence container so we can store additional data in the fence. This also fixes the start_time handling for MCBP since we were casting the fence to an amdgpu_fence and it wasn't. Fixes: 3f4c175d62d8 ("drm/amdgpu: MCBP based on DRM scheduler (v9)") Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit bf1cd14f9e2e1fdf981eed273ddd595863f5288c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 30 +++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 12 ++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 +++++++++++ 6 files changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 8e626f50b362..f81608330a3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1902,7 +1902,7 @@ static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring) continue; } job = to_amdgpu_job(s_job); - if (preempted && (&job->hw_fence) == fence) + if (preempted && (&job->hw_fence.base) == fence) /* mark the job as preempted */ job->preemption_status |= AMDGPU_IB_PREEMPTED; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 500bf85b3ee8..78f8755996f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -6362,7 +6362,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * * job->base holds a reference to parent fence */ - if (job && dma_fence_is_signaled(&job->hw_fence)) { + if (job && dma_fence_is_signaled(&job->hw_fence.base)) { job_signaled = true; dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); goto skip_hw_reset; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 8cecf25996ed..5fec808d7f54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -41,22 +41,6 @@ #include "amdgpu_trace.h" #include "amdgpu_reset.h" -/* - * Fences mark an event in the GPUs pipeline and are used - * for GPU/CPU synchronization. When the fence is written, - * it is expected that all buffers associated with that fence - * are no longer in use by the associated ring on the GPU and - * that the relevant GPU caches have been flushed. - */ - -struct amdgpu_fence { - struct dma_fence base; - - /* RB, DMA, etc. */ - struct amdgpu_ring *ring; - ktime_t start_timestamp; -}; - static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) @@ -151,12 +135,12 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd am_fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_ATOMIC); if (am_fence == NULL) return -ENOMEM; - fence = &am_fence->base; - am_fence->ring = ring; } else { /* take use of job-embedded fence */ - fence = &job->hw_fence; + am_fence = &job->hw_fence; } + fence = &am_fence->base; + am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; if (job && job->job_run_counter) { @@ -718,7 +702,7 @@ void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) * it right here or we won't be able to track them in fence_drv * and they will remain unsignaled during sa_bo free. */ - job = container_of(old, struct amdgpu_job, hw_fence); + job = container_of(old, struct amdgpu_job, hw_fence.base); if (!job->base.s_fence && !dma_fence_is_signaled(old)) dma_fence_signal(old); RCU_INIT_POINTER(*ptr, NULL); @@ -780,7 +764,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); return (const char *)to_amdgpu_ring(job->base.sched)->name; } @@ -810,7 +794,7 @@ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) */ static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence.base); if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); @@ -845,7 +829,7 @@ static void amdgpu_job_fence_free(struct rcu_head *rcu) struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); /* free job if fence has a parent job */ - kfree(container_of(f, struct amdgpu_job, hw_fence)); + kfree(container_of(f, struct amdgpu_job, hw_fence.base)); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index acb21fc8b3ce..ddb9d3269357 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -272,8 +272,8 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) /* Check if any fences where initialized */ if (job->base.s_fence && job->base.s_fence->finished.ops) f = &job->base.s_fence->finished; - else if (job->hw_fence.ops) - f = &job->hw_fence; + else if (job->hw_fence.base.ops) + f = &job->hw_fence.base; else f = NULL; @@ -290,10 +290,10 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) amdgpu_sync_free(&job->explicit_sync); /* only put the hw fence if has embedded fence */ - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } void amdgpu_job_set_gang_leader(struct amdgpu_job *job, @@ -322,10 +322,10 @@ void amdgpu_job_free(struct amdgpu_job *job) if (job->gang_submit != &job->base.s_fence->scheduled) dma_fence_put(job->gang_submit); - if (!job->hw_fence.ops) + if (!job->hw_fence.base.ops) kfree(job); else - dma_fence_put(&job->hw_fence); + dma_fence_put(&job->hw_fence.base); } struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index f2c049129661..931fed8892cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -48,7 +48,7 @@ struct amdgpu_job { struct drm_sched_job base; struct amdgpu_vm *vm; struct amdgpu_sync explicit_sync; - struct dma_fence hw_fence; + struct amdgpu_fence hw_fence; struct dma_fence *gang_submit; uint32_t preamble_status; uint32_t preemption_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b95b47110769..e1f25218943a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -127,6 +127,22 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +/* + * Fences mark an event in the GPUs pipeline and are used + * for GPU/CPU synchronization. When the fence is written, + * it is expected that all buffers associated with that fence + * are no longer in use by the associated ring on the GPU and + * that the relevant GPU caches have been flushed. + */ + +struct amdgpu_fence { + struct dma_fence base; + + /* RB, DMA, etc. */ + struct amdgpu_ring *ring; + ktime_t start_timestamp; +}; + extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); From 49cc5beeabd5b9b6a6660be8ea2e95de30ec0a07 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 15:43:55 -0400 Subject: [PATCH 430/497] drm/amdgpu/sdma5: init engine reset mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Missing the mutex init. Fixes: e56d4bf57fab ("drm/amdgpu/: drm/amdgpu: Register the new sdma function pointers for sdma_v5_0") Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 3f4caf092f02f0de169c6122639af481c7edc8f9) --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 9505ae96fbec..1813c3ed0aa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1399,6 +1399,7 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); adev->sdma.instance[i].funcs = &sdma_v5_0_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; From cfb05257ae168a0496c7637e1d9e3ab8a25cbffe Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 11 Jun 2025 09:52:14 -0500 Subject: [PATCH 431/497] drm/amdkfd: Fix race in GWS queue scheduling q->gws is not updated atomically with qpd->mapped_gws_queue. If a runlist is created between pqm_set_gws and update_queue it will contain a queue which uses GWS in a process with no GWS allocated. This will result in a scheduler hang. Use q->properties.is_gws which is changed while holding the DQM lock. Signed-off-by: Jay Cornwall Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher (cherry picked from commit b98370220eb3110e82248e3354e16a489a492cfb) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 8fa6489b6f5d..505036968a77 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -240,7 +240,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; + packet->bitfields2.gws_control_queue = q->properties.is_gws ? 1 : 0; packet->bitfields2.extended_engine_sel = extended_engine_sel__mes_map_queues__legacy_engine_sel; packet->bitfields2.queue_type = From fe79ef3530d3c681ef2d5644a9d1d1a67b21426a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 15:47:13 -0400 Subject: [PATCH 432/497] drm/amdgpu/sdma5.2: init engine reset mutex Missing the mutex init. Fixes: 47454f2dc0bf ("drm/amdgpu: Register the new sdma function pointers for sdma_v5_2") Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit ea685ff30a51a25dd9be90786933ada49a088f65) --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index a6e612b4a892..23f97da62808 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1318,6 +1318,7 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); adev->sdma.instance[i].funcs = &sdma_v5_2_sdma_funcs; ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; From 88efa0de3285be66969b71ec137d9dab1ee19e52 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 19 Jun 2025 00:23:06 +0800 Subject: [PATCH 433/497] EDAC/igen6: Fix NULL pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A kernel panic was reported with the following kernel log: EDAC igen6: Expected 2 mcs, but only 1 detected. BUG: unable to handle page fault for address: 000000000000d570 ... Hardware name: Notebook V54x_6x_TU/V54x_6x_TU, BIOS Dasharo (coreboot+UEFI) v0.9.0 07/17/2024 RIP: e030:ecclog_handler+0x7e/0xf0 [igen6_edac] ... igen6_probe+0x2a0/0x343 [igen6_edac] ... igen6_init+0xc5/0xff0 [igen6_edac] ... This issue occurred because one memory controller was disabled by the BIOS but the igen6_edac driver still checked all the memory controllers, including this absent one, to identify the source of the error. Accessing the null MMIO for the absent memory controller resulted in the oops above. Fix this issue by reverting the configuration structure to non-const and updating the field 'res_cfg->num_imc' to reflect the number of detected memory controllers. Fixes: 20e190b1c1fd ("EDAC/igen6: Skip absent memory controllers") Reported-by: Marek Marczykowski-Górecki Closes: https://lore.kernel.org/all/aFFN7RlXkaK_loQb@mail-itl/ Suggested-by: Borislav Petkov Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Tested-by: Marek Marczykowski-Górecki Link: https://lore.kernel.org/r/20250618162307.1523736-1-qiuxu.zhuo@intel.com --- drivers/edac/igen6_edac.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c index 1930dc00c791..1cb5c67e78ae 100644 --- a/drivers/edac/igen6_edac.c +++ b/drivers/edac/igen6_edac.c @@ -125,7 +125,7 @@ #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6) #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26) -static const struct res_config { +static struct res_config { bool machine_check; /* The number of present memory controllers. */ int num_imc; @@ -479,7 +479,7 @@ static u64 rpl_p_err_addr(u64 ecclog) return ECC_ERROR_LOG_ADDR45(ecclog); } -static const struct res_config ehl_cfg = { +static struct res_config ehl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xdc00, @@ -489,7 +489,7 @@ static const struct res_config ehl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static const struct res_config icl_cfg = { +static struct res_config icl_cfg = { .num_imc = 1, .imc_base = 0x5000, .ibecc_base = 0xd800, @@ -499,7 +499,7 @@ static const struct res_config icl_cfg = { .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr, }; -static const struct res_config tgl_cfg = { +static struct res_config tgl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0x5000, @@ -513,7 +513,7 @@ static const struct res_config tgl_cfg = { .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr, }; -static const struct res_config adl_cfg = { +static struct res_config adl_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -524,7 +524,7 @@ static const struct res_config adl_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config adl_n_cfg = { +static struct res_config adl_n_cfg = { .machine_check = true, .num_imc = 1, .imc_base = 0xd800, @@ -535,7 +535,7 @@ static const struct res_config adl_n_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config rpl_p_cfg = { +static struct res_config rpl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -547,7 +547,7 @@ static const struct res_config rpl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config mtl_ps_cfg = { +static struct res_config mtl_ps_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -558,7 +558,7 @@ static const struct res_config mtl_ps_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct res_config mtl_p_cfg = { +static struct res_config mtl_p_cfg = { .machine_check = true, .num_imc = 2, .imc_base = 0xd800, @@ -569,7 +569,7 @@ static const struct res_config mtl_p_cfg = { .err_addr_to_imc_addr = adl_err_addr_to_imc_addr, }; -static const struct pci_device_id igen6_pci_tbl[] = { +static struct pci_device_id igen6_pci_tbl[] = { { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg }, { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg }, @@ -1350,9 +1350,11 @@ static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) return -ENODEV; } - if (lmc < res_cfg->num_imc) + if (lmc < res_cfg->num_imc) { igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.", res_cfg->num_imc, lmc); + res_cfg->num_imc = lmc; + } return 0; From 306cb65bb0cb243389fcbd0a66907d5bdea07d1e Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Mon, 17 Mar 2025 15:57:27 +0530 Subject: [PATCH 434/497] smb: fix secondary channel creation issue with kerberos by populating hostname when adding channels When mounting a share with kerberos authentication with multichannel support, share mounts correctly, but fails to create secondary channels. This occurs because the hostname is not populated when adding the channels. The hostname is necessary for the userspace cifs.upcall program to retrieve the required credentials and pass it back to kernel, without hostname secondary channels fails establish. Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Bharath SM Reported-by: xfuren Link: https://bugzilla.samba.org/show_bug.cgi?id=15824 Signed-off-by: Steve French --- fs/smb/client/sess.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index ec0db32c7d98..330bc3d25bad 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -498,8 +498,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, ctx->domainauto = ses->domainAuto; ctx->domainname = ses->domainName; - /* no hostname for extra channels */ - ctx->server_hostname = ""; + ctx->server_hostname = ses->server->hostname; ctx->username = ses->user_name; ctx->password = ses->password; From 840738eae94864993a735ab677b9795bb8f3b961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Sun, 8 Jun 2025 16:10:33 +0200 Subject: [PATCH 435/497] cifs: Remove duplicate fattr->cf_dtype assignment from wsl_to_fattr() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 8bd25b61c5a5 ("smb: client: set correct d_type for reparse DFS/DFSR and mount point") deduplicated assignment of fattr->cf_dtype member from all places to end of the function cifs_reparse_point_to_fattr(). The only one missing place which was not deduplicated is wsl_to_fattr(). Fix it. Fixes: 8bd25b61c5a5 ("smb: client: set correct d_type for reparse DFS/DFSR and mount point") Signed-off-by: Pali Rohár Signed-off-by: Steve French --- fs/smb/client/reparse.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index bb25e77c5540..511611206dab 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -1172,7 +1172,6 @@ static bool wsl_to_fattr(struct cifs_open_info_data *data, if (!have_xattr_dev && (tag == IO_REPARSE_TAG_LX_CHR || tag == IO_REPARSE_TAG_LX_BLK)) return false; - fattr->cf_dtype = S_DT(fattr->cf_mode); return true; } From 6fcab2791543924d438e7fa49276d0998b0a069f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 18 Jun 2025 14:17:45 +0200 Subject: [PATCH 436/497] ACPICA: Refuse to evaluate a method if arguments are missing As reported in [1], a platform firmware update that increased the number of method parameters and forgot to update a least one of its callers, caused ACPICA to crash due to use-after-free. Since this a result of a clear AML issue that arguably cannot be fixed up by the interpreter (it cannot produce missing data out of thin air), address it by making ACPICA refuse to evaluate a method if the caller attempts to pass fewer arguments than expected to it. Closes: https://github.com/acpica/acpica/issues/1027 [1] Reported-by: Peter Williams Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede Tested-by: Hans de Goede # Dell XPS 9640 with BIOS 1.12.0 Link: https://patch.msgid.link/5909446.DvuYhMxLoT@rjwysocki.net Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dsmethod.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index c8f37f4e6626..fef6fb29ece4 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -483,6 +483,13 @@ acpi_ds_call_control_method(struct acpi_thread_state *thread, return_ACPI_STATUS(AE_NULL_OBJECT); } + if (this_walk_state->num_operands < obj_desc->method.param_count) { + ACPI_ERROR((AE_INFO, "Missing argument for method [%4.4s]", + acpi_ut_get_node_name(method_node))); + + return_ACPI_STATUS(AE_AML_UNINITIALIZED_ARG); + } + /* Init for new method, possibly wait on method mutex */ status = From 37fb58a7273726e59f9429c89ade5116083a213d Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Wed, 18 Jun 2025 07:32:17 +0000 Subject: [PATCH 437/497] cgroup,freezer: fix incomplete freezing when attaching tasks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An issue was found: # cd /sys/fs/cgroup/freezer/ # mkdir test # echo FROZEN > test/freezer.state # cat test/freezer.state FROZEN # sleep 1000 & [1] 863 # echo 863 > test/cgroup.procs # cat test/freezer.state FREEZING When tasks are migrated to a frozen cgroup, the freezer fails to immediately freeze the tasks, causing the cgroup to remain in the "FREEZING". The freeze_task() function is called before clearing the CGROUP_FROZEN flag. This causes the freezing() check to incorrectly return false, preventing __freeze_task() from being invoked for the migrated task. To fix this issue, clear the CGROUP_FROZEN state before calling freeze_task(). Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") Cc: stable@vger.kernel.org # v6.1+ Reported-by: Zhong Jiawei Signed-off-by: Chen Ridong Acked-by: Michal Koutný Signed-off-by: Tejun Heo --- kernel/cgroup/legacy_freezer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index 039d1eb2f215..507b8f19a262 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -188,13 +188,12 @@ static void freezer_attach(struct cgroup_taskset *tset) if (!(freezer->state & CGROUP_FREEZING)) { __thaw_task(task); } else { - freeze_task(task); - /* clear FROZEN and propagate upwards */ while (freezer && (freezer->state & CGROUP_FROZEN)) { freezer->state &= ~CGROUP_FROZEN; freezer = parent_freezer(freezer); } + freeze_task(task); } } From c6d732c38f93c4aebd204a5656583142289c3a2e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Jun 2025 13:22:40 -0700 Subject: [PATCH 438/497] net: ethtool: remove duplicate defines for family info Commit under fixes switched to uAPI generation from the YAML spec. A number of custom defines were left behind, mostly for commands very hard to express in YAML spec. Among what was left behind was the name and version of the generic netlink family. Problem is that the codegen always outputs those values so we ended up with a duplicated, differently named set of defines. Provide naming info in YAML and remove the incorrect defines. Fixes: 8d0580c6ebdd ("ethtool: regenerate uapi header from the spec") Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250617202240.811179-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 3 +++ include/uapi/linux/ethtool_netlink.h | 4 ---- include/uapi/linux/ethtool_netlink_generated.h | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 9f98715a6512..72a076b0e1b5 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -7,6 +7,9 @@ protocol: genetlink-legacy doc: Partial family for Ethtool Netlink. uapi-header: linux/ethtool_netlink_generated.h +c-family-name: ethtool-genl-name +c-version-name: ethtool-genl-version + definitions: - name: udp-tunnel-type diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 9ff72cfb2e98..09a75bdb6560 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -208,10 +208,6 @@ enum { ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1) }; -/* generic netlink info */ -#define ETHTOOL_GENL_NAME "ethtool" -#define ETHTOOL_GENL_VERSION 1 - #define ETHTOOL_MCGRP_MONITOR_NAME "monitor" #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */ diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 9a02f579de22..aa8ab5227c1e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -6,8 +6,8 @@ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H -#define ETHTOOL_FAMILY_NAME "ethtool" -#define ETHTOOL_FAMILY_VERSION 1 +#define ETHTOOL_GENL_NAME "ethtool" +#define ETHTOOL_GENL_VERSION 1 enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, From ae409629e022fbebbc6d31a1bfeccdbbeee20fd6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 17 Jun 2025 20:20:17 +0200 Subject: [PATCH 439/497] net: ftgmac100: select FIXED_PHY Depending on e.g. DT configuration this driver uses a fixed link. So we shouldn't rely on the user to enable FIXED_PHY, select it in Kconfig instead. We may end up with a non-functional driver otherwise. Fixes: 38561ded50d0 ("net: ftgmac100: support fixed link") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/476bb33b-5584-40f0-826a-7294980f2895@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/faraday/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/faraday/Kconfig b/drivers/net/ethernet/faraday/Kconfig index c699bd6bcbb9..474073c7f94d 100644 --- a/drivers/net/ethernet/faraday/Kconfig +++ b/drivers/net/ethernet/faraday/Kconfig @@ -31,6 +31,7 @@ config FTGMAC100 depends on ARM || COMPILE_TEST depends on !64BIT || BROKEN select PHYLIB + select FIXED_PHY select MDIO_ASPEED if MACH_ASPEED_G6 select CRC32 help From 9ac8d0c640a161486eaf71d1999ee990fad62947 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 17 Jun 2025 12:04:02 +0530 Subject: [PATCH 440/497] Octeontx2-pf: Fix Backpresure configuration NIX block can receive packets from multiple links such as MAC (RPM), LBK and CPT. ----------------- RPM --| NIX | ----------------- | | LBK Each link supports multiple channels for example RPM link supports 16 channels. In case of link oversubsribe, NIX will assert backpressure on receive channels. The previous patch considered a single channel per link, resulting in backpressure not being enabled on the remaining channels Fixes: a7ef63dbd588 ("octeontx2-af: Disable backpressure between CPT and NIX") Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250617063403.3582210-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 6f572589f1e5..6b5c9536d26d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1822,7 +1822,7 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) req->chan_cnt = IEEE_8021QAZ_MAX_TCS; req->bpid_per_chan = 1; } else { - req->chan_cnt = 1; + req->chan_cnt = pfvf->hw.rx_chan_cnt; req->bpid_per_chan = 0; } @@ -1847,7 +1847,7 @@ int otx2_nix_cpt_config_bp(struct otx2_nic *pfvf, bool enable) req->chan_cnt = IEEE_8021QAZ_MAX_TCS; req->bpid_per_chan = 1; } else { - req->chan_cnt = 1; + req->chan_cnt = pfvf->hw.rx_chan_cnt; req->bpid_per_chan = 0; } From f82727adcf2992822e12198792af450a76ebd5ef Mon Sep 17 00:00:00 2001 From: Haixia Qu Date: Tue, 17 Jun 2025 05:56:24 +0000 Subject: [PATCH 441/497] tipc: fix null-ptr-deref when acquiring remote ip of ethernet bearer The reproduction steps: 1. create a tun interface 2. enable l2 bearer 3. TIPC_NL_UDP_GET_REMOTEIP with media name set to tun tipc: Started in network mode tipc: Node identity 8af312d38a21, cluster identity 4711 tipc: Enabled bearer , priority 1 Oops: general protection fault KASAN: null-ptr-deref in range CPU: 1 UID: 1000 PID: 559 Comm: poc Not tainted 6.16.0-rc1+ #117 PREEMPT Hardware name: QEMU Ubuntu 24.04 PC RIP: 0010:tipc_udp_nl_dump_remoteip+0x4a4/0x8f0 the ub was in fact a struct dev. when bid != 0 && skip_cnt != 0, bearer_list[bid] may be NULL or other media when other thread changes it. fix this by checking media_id. Fixes: 832629ca5c313 ("tipc: add UDP remoteip dump to netlink API") Signed-off-by: Haixia Qu Reviewed-by: Tung Nguyen Link: https://patch.msgid.link/20250617055624.2680-1-hxqu@hillstonenet.com Signed-off-by: Jakub Kicinski --- net/tipc/udp_media.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 108a4cc2e001..258d6aa4f21a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -489,7 +489,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = tipc_bearer_find(net, bname); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } @@ -500,7 +500,7 @@ int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); b = rtnl_dereference(tn->bearer_list[bid]); - if (!b) { + if (!b || b->bcast_addr.media_id != TIPC_MEDIA_TYPE_UDP) { rtnl_unlock(); return -EINVAL; } From 3168276591210d147ed9e6dc267f8a04007593c7 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 17 Jun 2025 14:20:59 -0700 Subject: [PATCH 442/497] selftests: netdevsim: improve lib.sh include in peer.sh Fix the peer.sh test to run from INSTALL_PATH. Signed-off-by: David Wei Link: https://patch.msgid.link/20250617212102.175711-2-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netdevsim/peer.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh index 1bb46ec435d4..7f32b5600925 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -source ../../../net/lib.sh +lib_dir=$(dirname $0)/../../../net +source $lib_dir/lib.sh NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID From c65b5bb2329e36340eba76f05752f8f1eb15bee0 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 17 Jun 2025 14:21:00 -0700 Subject: [PATCH 443/497] selftests: net: add passive TFO test binary Add a simple passive TFO server and client test binary. This will be used to test the SO_INCOMING_NAPI_ID of passive TFO accepted sockets. Signed-off-by: David Wei Link: https://patch.msgid.link/20250617212102.175711-3-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/.gitignore | 1 + tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/tfo.c | 171 +++++++++++++++++++++++++ 3 files changed, 173 insertions(+) create mode 100644 tools/testing/selftests/net/tfo.c diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 532bb732bc6d..c6dd2a335cf4 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -50,6 +50,7 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tfo timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ab996bd22a5f..ab8da438fd78 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -110,6 +110,7 @@ TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off +TEST_GEN_FILES += tfo # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c new file mode 100644 index 000000000000..eb3cac5e583c --- /dev/null +++ b/tools/testing/selftests/net/tfo.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static struct sockaddr_in6 cfg_addr; +static char *cfg_outfile; + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static void run_server(void) +{ + unsigned long qlen = 32; + int fd, opt, connfd; + socklen_t len; + char buf[64]; + FILE *outfile; + + outfile = fopen(cfg_outfile, "w"); + if (!outfile) + error(1, errno, "fopen() outfile"); + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + opt = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0) + error(1, errno, "setsockopt(TCP_FASTOPEN)"); + + if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0) + error(1, errno, "bind()"); + + if (listen(fd, 5) < 0) + error(1, errno, "listen()"); + + len = sizeof(cfg_addr); + connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len); + if (connfd < 0) + error(1, errno, "accept()"); + + len = sizeof(opt); + if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) + error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); + + read(connfd, buf, 64); + fprintf(outfile, "%d\n", opt); + + fclose(outfile); + close(connfd); + close(fd); +} + +static void run_client(void) +{ + int fd; + char *msg = "Hello, world!"; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-s|-c) -h -p -o ", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "sch:p:o:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Server cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "Client address parse error: %s", addr); + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} From 137e7b5cceda2fd634ff47fde6c4226092d09442 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 17 Jun 2025 14:21:01 -0700 Subject: [PATCH 444/497] selftests: net: add test for passive TFO socket NAPI ID Add a test that checks that the NAPI ID of a passive TFO socket is valid i.e. not zero. Signed-off-by: David Wei Link: https://patch.msgid.link/20250617212102.175711-4-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/tfo_passive.sh | 112 +++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100755 tools/testing/selftests/net/tfo_passive.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ab8da438fd78..332f387615d7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -111,6 +111,7 @@ TEST_PROGS += lwt_dst_cache_ref_loop.sh TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off TEST_GEN_FILES += tfo +TEST_PROGS += tfo_passive.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh new file mode 100755 index 000000000000..80bf11fdc046 --- /dev/null +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + # Enable passive TFO + ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD} \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +out_file=$(mktemp) + +timeout -k 1s 30s ip netns exec nssv ./tfo \ + -s \ + -p ${SERVER_PORT} \ + -o ${out_file}& + +wait_local_port_listen nssv ${SERVER_PORT} tcp + +ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT} + +wait + +res=$(cat $out_file) +rm $out_file + +if [ $res -eq 0 ]; then + echo "got invalid NAPI ID from passive TFO socket" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 From dbe0ca8da1f62b6252e7be6337209f4d86d4a914 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 17 Jun 2025 14:21:02 -0700 Subject: [PATCH 445/497] tcp: fix passive TFO socket having invalid NAPI ID There is a bug with passive TFO sockets returning an invalid NAPI ID 0 from SO_INCOMING_NAPI_ID. Normally this is not an issue, but zero copy receive relies on a correct NAPI ID to process sockets on the right queue. Fix by adding a sk_mark_napi_id_set(). Fixes: e5907459ce7e ("tcp: Record Rx hash and NAPI ID in tcp_child_process") Signed-off-by: David Wei Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250617212102.175711-5-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_fastopen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 9b83d639b5ac..5107121c5e37 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -3,6 +3,7 @@ #include #include #include +#include void tcp_fastopen_init_key_once(struct net *net) { @@ -279,6 +280,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, refcount_set(&req->rsk_refcnt, 2); + sk_mark_napi_id_set(child, skb); + /* Now finish processing the fastopen child socket. */ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, skb); From 7869738b6908eec7755818aaf6f3aa068b2f0e1b Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 19 Jun 2025 11:28:39 +0800 Subject: [PATCH 446/497] erofs: refuse crafted out-of-file-range encoded extents Crafted encoded extents could record out-of-range `lstart`, which should not happen in normal cases. It caused an iomap_iter_done() complaint [1] reported by syzbot. [1] https://lore.kernel.org/r/684cb499.a00a0220.c6bd7.0010.GAE@google.com Fixes: 1d191b4ca51d ("erofs: implement encoded extent metadata") Reported-and-tested-by: syzbot+d8f000c609f05f52d9b5@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d8f000c609f05f52d9b5 Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250619032839.2642193-1-hsiangkao@linux.alibaba.com --- fs/erofs/zmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 14ea47f954f5..6afcb054780d 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -597,6 +597,10 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (la > map->m_la) { r = mid; + if (la > lend) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } lend = la; } else { l = mid + 1; From 8a8ff069c7ad9a359c54683329883e2432cff191 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 15 Jun 2025 16:11:38 +0100 Subject: [PATCH 447/497] KVM: arm64: nv: Fix tracking of shadow list registers Wei-Lin reports that the tracking of shadow list registers is majorly broken when resync'ing the L2 state after a run, as we confuse the guest's LR index with the host's, potentially losing the interrupt state. While this could be fixed by adding yet another side index to track it (Wei-Lin's fix), it may be better to refactor this code to avoid having a side index altogether, limiting the risk to introduce this class of bugs. A key observation is that the shadow index is always the number of bits in the lr_map bitmap. With that, the parallel indexing scheme can be completely dropped. While doing this, introduce a couple of helpers that abstract the index conversion and some of the LR repainting, making the whole exercise much simpler. Reported-by: Wei-Lin Chang Reviewed-by: Wei-Lin Chang Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20250614145721.2504524-1-r09922117@csie.ntu.edu.tw Link: https://lore.kernel.org/r/86qzzkc5xa.wl-maz@kernel.org --- arch/arm64/kvm/vgic/vgic-v3-nested.c | 81 ++++++++++++++-------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c index d22a8ad7bcc5..a50fb7e6841f 100644 --- a/arch/arm64/kvm/vgic/vgic-v3-nested.c +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -36,6 +36,11 @@ struct shadow_if { static DEFINE_PER_CPU(struct shadow_if, shadow_if); +static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx) +{ + return hweight16(shadow_if->lr_map & (BIT(idx) - 1)); +} + /* * Nesting GICv3 support * @@ -209,6 +214,29 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) return reg; } +static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr) +{ + struct vgic_irq *irq; + + if (!(lr & ICH_LR_HW)) + return lr; + + /* We have the HW bit set, check for validity of pINTID */ + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + /* If there was no real mapping, nuke the HW bit */ + if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI) + lr &= ~ICH_LR_HW; + + /* Translate the virtual mapping to the real one, even if invalid */ + if (irq) { + lr &= ~ICH_LR_PHYS_ID_MASK; + lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + vgic_put_irq(vcpu->kvm, irq); + } + + return lr; +} + /* * For LRs which have HW bit set such as timer interrupts, we modify them to * have the host hardware interrupt number instead of the virtual one programmed @@ -217,58 +245,37 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu, struct vgic_v3_cpu_if *s_cpu_if) { - unsigned long lr_map = 0; - int index = 0; + struct shadow_if *shadow_if; + + shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif); + shadow_if->lr_map = 0; for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) { u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); - struct vgic_irq *irq; if (!(lr & ICH_LR_STATE)) - lr = 0; + continue; - if (!(lr & ICH_LR_HW)) - goto next; + lr = translate_lr_pintid(vcpu, lr); - /* We have the HW bit set, check for validity of pINTID */ - irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); - if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) { - /* There was no real mapping, so nuke the HW bit */ - lr &= ~ICH_LR_HW; - if (irq) - vgic_put_irq(vcpu->kvm, irq); - goto next; - } - - /* Translate the virtual mapping to the real one */ - lr &= ~ICH_LR_PHYS_ID_MASK; - lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); - - vgic_put_irq(vcpu->kvm, irq); - -next: - s_cpu_if->vgic_lr[index] = lr; - if (lr) { - lr_map |= BIT(i); - index++; - } + s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr; + shadow_if->lr_map |= BIT(i); } - container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map; - s_cpu_if->used_lrs = index; + s_cpu_if->used_lrs = hweight16(shadow_if->lr_map); } void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) { struct shadow_if *shadow_if = get_shadow_if(); - int i, index = 0; + int i; for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); struct vgic_irq *irq; if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) - goto next; + continue; /* * If we had a HW lr programmed by the guest hypervisor, we @@ -277,15 +284,13 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) */ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ - goto next; + continue; - lr = __gic_v3_get_lr(index); + lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i)); if (!(lr & ICH_LR_STATE)) irq->active = false; vgic_put_irq(vcpu->kvm, irq); - next: - index++; } } @@ -368,13 +373,11 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu) val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); val &= ~ICH_LR_STATE; - val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE; + val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE; __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val); - s_cpu_if->vgic_lr[i] = 0; } - shadow_if->lr_map = 0; vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; } From 1fbe6861a6d9a942fb8ab8677ddf1ecb86b1af60 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Jun 2025 15:45:04 -0700 Subject: [PATCH 448/497] KVM: arm64: Explicitly treat routing entry type changes as changes Explicitly treat type differences as GSI routing changes, as comparing MSI data between two entries could get a false negative, e.g. if userspace changed the type but left the type-specific data as- Note, the same bug was fixed in x86 by commit bcda70c56f3e ("KVM: x86: Explicitly treat routing entry type changes as changes"). Fixes: 4bf3693d36af ("KVM: arm64: Unmap vLPIs affected by changes to GSI routing information") Signed-off-by: Sean Christopherson Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250611224604.313496-3-seanjc@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index de2b4e9c9f9f..38a91bb5d4c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2764,7 +2764,8 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return memcmp(&old->msi, &new->msi, sizeof(new->msi)); From 56a14984505b11674df5e01407748236bc4bc8f8 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sun, 8 Jun 2025 17:54:02 +0800 Subject: [PATCH 449/497] KVM: arm64: selftests: Close the GIC FD in arch_timer_edge_cases Close the GIC FD to free the reference it holds to the VM so that we can correctly clean up the VM. This also gets rid of the "KVM: debugfs: duplicate directory 395722-4" warning when running arch_timer_edge_cases. Signed-off-by: Zenghui Yu Reviewed-by: Miguel Luis Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20250608095402.1131-1-yuzenghui@huawei.com Signed-off-by: Marc Zyngier --- .../selftests/kvm/arm64/arch_timer_edge_cases.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index b4d22b3ab7cc..4e71740a098b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -954,6 +954,8 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } +static int gic_fd; + static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, enum arch_timer timer) { @@ -968,12 +970,20 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, vcpu_args_set(*vcpu, 1, timer); test_init_timer_irq(*vm, *vcpu); - vgic_v3_setup(*vm, 1, 64); + gic_fd = vgic_v3_setup(*vm, 1, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); + sync_global_to_guest(*vm, test_args); sync_global_to_guest(*vm, CVAL_MAX); sync_global_to_guest(*vm, DEF_CNT); } +static void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); +} + static void test_print_help(char *name) { pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" @@ -1060,13 +1070,13 @@ int main(int argc, char *argv[]) if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } if (test_args.test_physical) { test_vm_create(&vm, &vcpu, PHYSICAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } return 0; From 5bd1bafd4474ee26f504b41aba11f3e2a1175b88 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Jun 2025 12:55:10 -0700 Subject: [PATCH 450/497] eth: fbnic: avoid double free when failing to DMA-map FW msg The semantics are that caller of fbnic_mbx_map_msg() retains the ownership of the message on error. All existing callers dutifully free the page. Fixes: da3cde08209e ("eth: fbnic: Add FW communication mechanism") Reviewed-by: Alexander Duyck Signed-off-by: Jakub Kicinski Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250616195510.225819-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index e2368075ab8c..4521d0483d18 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -127,11 +127,8 @@ static int fbnic_mbx_map_msg(struct fbnic_dev *fbd, int mbx_idx, return -EBUSY; addr = dma_map_single(fbd->dev, msg, PAGE_SIZE, direction); - if (dma_mapping_error(fbd->dev, addr)) { - free_page((unsigned long)msg); - + if (dma_mapping_error(fbd->dev, addr)) return -ENOSPC; - } mbx->buf_info[tail].msg = msg; mbx->buf_info[tail].addr = addr; From cade3d57e456e69f67aa9894bf89dc8678796bb7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jun 2025 14:37:12 +0100 Subject: [PATCH 451/497] KVM: arm64: VHE: Synchronize restore of host debug registers When KVM runs in non-protected VHE mode, there's no context synchronization event between __debug_switch_to_host() restoring the host debug registers and __kvm_vcpu_run() unmasking debug exceptions. Due to this, it's theoretically possible for the host to take an unexpected debug exception due to the stale guest configuration. This cannot happen in NVHE/HVHE mode as debug exceptions are masked in the hyp code, and the exception return to the host will provide the necessary context synchronization before debug exceptions can be taken. For now, avoid the problem by adding an ISB after VHE hyp code restores the host debug registers. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250617133718.4014181-2-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 502a5b73ee70..73881e1dc267 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -167,6 +167,9 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) __debug_save_state(guest_dbg, guest_ctxt); __debug_restore_state(host_dbg, host_ctxt); + + if (has_vhe()) + isb(); } #endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ From 257d0aa8e2502754bc758faceceb6ff59318af60 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jun 2025 14:37:13 +0100 Subject: [PATCH 452/497] KVM: arm64: VHE: Synchronize CPTR trap deactivation Currently there is no ISB between __deactivate_cptr_traps() disabling traps that affect EL2 and fpsimd_lazy_switch_to_host() manipulating registers potentially affected by CPTR traps. When NV is not in use, this is safe because the relevant registers are only accessed when guest_owns_fp_regs() && vcpu_has_sve(vcpu), and this also implies that SVE traps affecting EL2 have been deactivated prior to __guest_entry(). When NV is in use, a guest hypervisor may have configured SVE traps for a nested context, and so it is necessary to have an ISB between __deactivate_cptr_traps() and fpsimd_lazy_switch_to_host(). Due to the current lack of an ISB, when a guest hypervisor enables SVE traps in CPTR, the host can take an unexpected SVE trap from within fpsimd_lazy_switch_to_host(), e.g. | Unhandled 64-bit el1h sync exception on CPU1, ESR 0x0000000066000000 -- SVE | CPU: 1 UID: 0 PID: 164 Comm: kvm-vcpu-0 Not tainted 6.15.0-rc4-00138-ga05e0f012c05 #3 PREEMPT | Hardware name: FVP Base RevC (DT) | pstate: 604023c9 (nZCv DAIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : __kvm_vcpu_run+0x6f4/0x844 | lr : __kvm_vcpu_run+0x150/0x844 | sp : ffff800083903a60 | x29: ffff800083903a90 x28: ffff000801f4a300 x27: 0000000000000000 | x26: 0000000000000000 x25: ffff000801f90000 x24: ffff000801f900f0 | x23: ffff800081ff7720 x22: 0002433c807d623f x21: ffff000801f90000 | x20: ffff00087f730730 x19: 0000000000000000 x18: 0000000000000000 | x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 | x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 | x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 | x8 : 0000000000000000 x7 : 0000000000000000 x6 : ffff000801f90d70 | x5 : 0000000000001000 x4 : ffff8007fd739000 x3 : ffff000801f90000 | x2 : 0000000000000000 x1 : 00000000000003cc x0 : ffff800082f9d000 | Kernel panic - not syncing: Unhandled exception | CPU: 1 UID: 0 PID: 164 Comm: kvm-vcpu-0 Not tainted 6.15.0-rc4-00138-ga05e0f012c05 #3 PREEMPT | Hardware name: FVP Base RevC (DT) | Call trace: | show_stack+0x18/0x24 (C) | dump_stack_lvl+0x60/0x80 | dump_stack+0x18/0x24 | panic+0x168/0x360 | __panic_unhandled+0x68/0x74 | el1h_64_irq_handler+0x0/0x24 | el1h_64_sync+0x6c/0x70 | __kvm_vcpu_run+0x6f4/0x844 (P) | kvm_arm_vcpu_enter_exit+0x64/0xa0 | kvm_arch_vcpu_ioctl_run+0x21c/0x870 | kvm_vcpu_ioctl+0x1a8/0x9d0 | __arm64_sys_ioctl+0xb4/0xf4 | invoke_syscall+0x48/0x104 | el0_svc_common.constprop.0+0x40/0xe0 | do_el0_svc+0x1c/0x28 | el0_svc+0x30/0xcc | el0t_64_sync_handler+0x10c/0x138 | el0t_64_sync+0x198/0x19c | SMP: stopping secondary CPUs | Kernel Offset: disabled | CPU features: 0x0000,000002c0,02df4fb9,97ee773f | Memory Limit: none | ---[ end Kernel panic - not syncing: Unhandled exception ]--- Fix this by adding an ISB between __deactivate_traps() and fpsimd_lazy_switch_to_host(). Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Link: https://lore.kernel.org/r/20250617133718.4014181-3-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/vhe/switch.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 09df2b42bc1b..20df44b49ceb 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -667,6 +667,9 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); + /* Ensure CPTR trap deactivation has taken effect */ + isb(); + fpsimd_lazy_switch_to_host(vcpu); sysreg_restore_host_state_vhe(host_ctxt); From e62dd507844fa47f0fdc29f3be5a90a83f297820 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jun 2025 14:37:14 +0100 Subject: [PATCH 453/497] KVM: arm64: Reorganise CPTR trap manipulation The NVHE/HVHE and VHE modes have separate implementations of __activate_cptr_traps() and __deactivate_cptr_traps() in their respective switch.c files. There's some duplication of logic, and it's not currently possible to reuse this logic elsewhere. Move the logic into the common switch.h header so that it can be reused, and de-duplicate the common logic. This rework changes the way SVE traps are deactivated in VHE mode, aligning it with NVHE/HVHE modes: * Before this patch, VHE's __deactivate_cptr_traps() would unconditionally enable SVE for host EL2 (but not EL0), regardless of whether the ARM64_SVE cpucap was set. * After this patch, VHE's __deactivate_cptr_traps() will take the ARM64_SVE cpucap into account. When ARM64_SVE is not set, SVE will be trapped from EL2 and below. The old and new behaviour are both benign: * When ARM64_SVE is not set, the host will not touch SVE state, and will not reconfigure SVE traps. Host EL0 access to SVE will be trapped as expected. * When ARM64_SVE is set, the host will configure EL0 SVE traps before returning to EL0 as part of reloading the EL0 FPSIMD/SVE/SME state. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Link: https://lore.kernel.org/r/20250617133718.4014181-4-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 130 ++++++++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/switch.c | 59 ----------- arch/arm64/kvm/hyp/vhe/switch.c | 81 --------------- 3 files changed, 130 insertions(+), 140 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 76dfda116e56..8a77fcccbcf6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -65,6 +65,136 @@ static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) } } +static inline void __activate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1 | CPTR_EL2_TAM | CPTR_EL2_TTA; + + /* + * Always trap SME since it's not supported in KVM. + * TSM is RES1 if SME isn't implemented. + */ + val |= CPTR_EL2_TSM; + + if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) + val |= CPTR_EL2_TZ; + + if (!guest_owns_fp_regs()) + val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); +} + +static inline void __activate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + /* + * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to + * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, + * except for some missing controls, such as TAM. + * In this case, CPTR_EL2.TAM has the same position with or without + * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM + * shift value for trapping the AMU accesses. + */ + u64 val = CPTR_EL2_TAM | CPACR_EL1_TTA; + u64 cptr; + + if (guest_owns_fp_regs()) { + val |= CPACR_EL1_FPEN; + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } + + if (!vcpu_has_nv(vcpu)) + goto write; + + /* + * The architecture is a bit crap (what a surprise): an EL2 guest + * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, + * as they are RES0 in the guest's view. To work around it, trap the + * sucker using the very same bit it can't set... + */ + if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) + val |= CPTR_EL2_TCPAC; + + /* + * Layer the guest hypervisor's trap configuration on top of our own if + * we're in a nested context. + */ + if (is_hyp_ctxt(vcpu)) + goto write; + + cptr = vcpu_sanitised_cptr_el2(vcpu); + + /* + * Pay attention, there's some interesting detail here. + * + * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two + * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): + * + * - CPTR_EL2.xEN = x0, traps are enabled + * - CPTR_EL2.xEN = x1, traps are disabled + * + * In other words, bit[0] determines if guest accesses trap or not. In + * the interest of simplicity, clear the entire field if the guest + * hypervisor has traps enabled to dispel any illusion of something more + * complicated taking place. + */ + if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_FPEN; + if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) + val &= ~CPACR_EL1_ZEN; + + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) + val |= cptr & CPACR_EL1_E0POE; + + val |= cptr & CPTR_EL2_TCPAC; + +write: + write_sysreg(val, cpacr_el1); +} + +static inline void __activate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (!guest_owns_fp_regs()) + __activate_traps_fpsimd32(vcpu); + + if (has_vhe() || has_hvhe()) + __activate_cptr_traps_vhe(vcpu); + else + __activate_cptr_traps_nvhe(vcpu); +} + +static inline void __deactivate_cptr_traps_nvhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPTR_NVHE_EL2_RES1; + + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + if (!cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; + + write_sysreg(val, cptr_el2); +} + +static inline void __deactivate_cptr_traps_vhe(struct kvm_vcpu *vcpu) +{ + u64 val = CPACR_EL1_FPEN; + + if (cpus_have_final_cap(ARM64_SVE)) + val |= CPACR_EL1_ZEN; + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN; + + write_sysreg(val, cpacr_el1); +} + +static inline void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (has_vhe() || has_hvhe()) + __deactivate_cptr_traps_vhe(vcpu); + else + __deactivate_cptr_traps_nvhe(vcpu); +} + #define reg_to_fgt_masks(reg) \ ({ \ struct fgt_masks *m; \ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 73affe1333a4..0e752b515d0f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -47,65 +47,6 @@ struct fgt_masks hdfgwtr2_masks; extern void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ - - if (!guest_owns_fp_regs()) - __activate_traps_fpsimd32(vcpu); - - if (has_hvhe()) { - val |= CPACR_EL1_TTA; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } - - write_sysreg(val, cpacr_el1); - } else { - val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; - - /* - * Always trap SME since it's not supported in KVM. - * TSM is RES1 if SME isn't implemented. - */ - val |= CPTR_EL2_TSM; - - if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs()) - val |= CPTR_EL2_TZ; - - if (!guest_owns_fp_regs()) - val |= CPTR_EL2_TFP; - - write_sysreg(val, cptr_el2); - } -} - -static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) -{ - if (has_hvhe()) { - u64 val = CPACR_EL1_FPEN; - - if (cpus_have_final_cap(ARM64_SVE)) - val |= CPACR_EL1_ZEN; - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN; - - write_sysreg(val, cpacr_el1); - } else { - u64 val = CPTR_NVHE_EL2_RES1; - - if (!cpus_have_final_cap(ARM64_SVE)) - val |= CPTR_EL2_TZ; - if (!cpus_have_final_cap(ARM64_SME)) - val |= CPTR_EL2_TSM; - - write_sysreg(val, cptr_el2); - } -} - static void __activate_traps(struct kvm_vcpu *vcpu) { ___activate_traps(vcpu, vcpu->arch.hcr_el2); diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 20df44b49ceb..32b4814eb2b7 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -90,87 +90,6 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu) return hcr | (guest_hcr & ~NV_HCR_GUEST_EXCLUDE); } -static void __activate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 cptr; - - /* - * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to - * CPTR_EL2. In general, CPACR_EL1 has the same layout as CPTR_EL2, - * except for some missing controls, such as TAM. - * In this case, CPTR_EL2.TAM has the same position with or without - * VHE (HCR.E2H == 1) which allows us to use here the CPTR_EL2.TAM - * shift value for trapping the AMU accesses. - */ - u64 val = CPACR_EL1_TTA | CPTR_EL2_TAM; - - if (guest_owns_fp_regs()) { - val |= CPACR_EL1_FPEN; - if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; - } else { - __activate_traps_fpsimd32(vcpu); - } - - if (!vcpu_has_nv(vcpu)) - goto write; - - /* - * The architecture is a bit crap (what a surprise): an EL2 guest - * writing to CPTR_EL2 via CPACR_EL1 can't set any of TCPAC or TTA, - * as they are RES0 in the guest's view. To work around it, trap the - * sucker using the very same bit it can't set... - */ - if (vcpu_el2_e2h_is_set(vcpu) && is_hyp_ctxt(vcpu)) - val |= CPTR_EL2_TCPAC; - - /* - * Layer the guest hypervisor's trap configuration on top of our own if - * we're in a nested context. - */ - if (is_hyp_ctxt(vcpu)) - goto write; - - cptr = vcpu_sanitised_cptr_el2(vcpu); - - /* - * Pay attention, there's some interesting detail here. - * - * The CPTR_EL2.xEN fields are 2 bits wide, although there are only two - * meaningful trap states when HCR_EL2.TGE = 0 (running a nested guest): - * - * - CPTR_EL2.xEN = x0, traps are enabled - * - CPTR_EL2.xEN = x1, traps are disabled - * - * In other words, bit[0] determines if guest accesses trap or not. In - * the interest of simplicity, clear the entire field if the guest - * hypervisor has traps enabled to dispel any illusion of something more - * complicated taking place. - */ - if (!(SYS_FIELD_GET(CPACR_EL1, FPEN, cptr) & BIT(0))) - val &= ~CPACR_EL1_FPEN; - if (!(SYS_FIELD_GET(CPACR_EL1, ZEN, cptr) & BIT(0))) - val &= ~CPACR_EL1_ZEN; - - if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S2POE, IMP)) - val |= cptr & CPACR_EL1_E0POE; - - val |= cptr & CPTR_EL2_TCPAC; - -write: - write_sysreg(val, cpacr_el1); -} - -static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) -{ - u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN; - - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN_EL1EN; - - write_sysreg(val, cpacr_el1); -} - static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; From 59e6e101a6fa542a365dd5858affd18ba3e84cb8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jun 2025 14:37:15 +0100 Subject: [PATCH 454/497] KVM: arm64: Remove ad-hoc CPTR manipulation from fpsimd_sve_sync() There's no need for fpsimd_sve_sync() to write to CPTR/CPACR. All relevant traps are always disabled earlier within __kvm_vcpu_run(), when __deactivate_cptr_traps() configures CPTR/CPACR. With irrelevant details elided, the flow is: handle___kvm_vcpu_run(...) { flush_hyp_vcpu(...) { fpsimd_sve_flush(...); } __kvm_vcpu_run(...) { __activate_traps(...) { __activate_cptr_traps(...); } do { __guest_enter(...); } while (...); __deactivate_traps(....) { __deactivate_cptr_traps(...); } } sync_hyp_vcpu(...) { fpsimd_sve_sync(...); } } Remove the unnecessary write to CPTR/CPACR. An ISB is still necessary, so a comment is added to describe this requirement. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Link: https://lore.kernel.org/r/20250617133718.4014181-5-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e9198e56e784..3206b2c07f82 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -69,7 +69,10 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (!guest_owns_fp_regs()) return; - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); + /* + * Traps have been disabled by __deactivate_cptr_traps(), but there + * hasn't necessarily been a context synchronization event yet. + */ isb(); if (vcpu_has_sve(vcpu)) From 186b58bacd74d9b7892869f7c7d20cf865a3c237 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jun 2025 14:37:16 +0100 Subject: [PATCH 455/497] KVM: arm64: Remove ad-hoc CPTR manipulation from kvm_hyp_handle_fpsimd() The hyp code FPSIMD/SVE/SME trap handling logic has some rather messy open-coded manipulation of CPTR/CPACR. This is benign for non-nested guests, but broken for nested guests, as the guest hypervisor's CPTR configuration is not taken into account. Consider the case where L0 provides FPSIMD+SVE to an L1 guest hypervisor, and the L1 guest hypervisor only provides FPSIMD to an L2 guest (with L1 configuring CPTR/CPACR to trap SVE usage from L2). If the L2 guest triggers an FPSIMD trap to the L0 hypervisor, kvm_hyp_handle_fpsimd() will see that the vCPU supports FPSIMD+SVE, and will configure CPTR/CPACR to NOT trap FPSIMD+SVE before returning to the L2 guest. Consequently the L2 guest would be able to manipulate SVE state even though the L1 hypervisor had configured CPTR/CPACR to forbid this. Clean this up, and fix the nested virt issue by always using __deactivate_cptr_traps() and __activate_cptr_traps() to manage the CPTR traps. This removes the need for the ad-hoc fixup in kvm_hyp_save_fpsimd_host(), and ensures that any guest hypervisor configuration of CPTR/CPACR is taken into account. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Link: https://lore.kernel.org/r/20250617133718.4014181-6-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/include/hyp/switch.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 8a77fcccbcf6..2ad57b117385 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -616,11 +616,6 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) */ if (system_supports_sve()) { __hyp_sve_save_host(); - - /* Re-enable SVE traps if not supported for the guest vcpu. */ - if (!vcpu_has_sve(vcpu)) - cpacr_clear_set(CPACR_EL1_ZEN, 0); - } else { __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs)); } @@ -671,10 +666,7 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ /* First disable enough traps to allow us to update the registers */ - if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve())) - cpacr_clear_set(0, CPACR_EL1_FPEN | CPACR_EL1_ZEN); - else - cpacr_clear_set(0, CPACR_EL1_FPEN); + __deactivate_cptr_traps(vcpu); isb(); /* Write out the host state if it's in the registers */ @@ -696,6 +688,13 @@ static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED; + /* + * Re-enable traps necessary for the current state of the guest, e.g. + * those enabled by a guest hypervisor. The ERET to the guest will + * provide the necessary context synchronization. + */ + __activate_cptr_traps(vcpu); + return true; } From 3a300a33e4063fb44c7887bec3aecd2fd6966df8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jun 2025 14:37:17 +0100 Subject: [PATCH 456/497] KVM: arm64: Remove cpacr_clear_set() We no longer use cpacr_clear_set(). Remove cpacr_clear_set() and its helper functions. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Link: https://lore.kernel.org/r/20250617133718.4014181-7-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_emulate.h | 62 ---------------------------- 1 file changed, 62 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index bd020fc28aa9..0720898f563e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -561,68 +561,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) vcpu_set_flag((v), e); \ } while (0) -#define __build_check_all_or_none(r, bits) \ - BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits)) - -#define __cpacr_to_cptr_clr(clr, set) \ - ({ \ - u64 cptr = 0; \ - \ - if ((set) & CPACR_EL1_FPEN) \ - cptr |= CPTR_EL2_TFP; \ - if ((set) & CPACR_EL1_ZEN) \ - cptr |= CPTR_EL2_TZ; \ - if ((set) & CPACR_EL1_SMEN) \ - cptr |= CPTR_EL2_TSM; \ - if ((clr) & CPACR_EL1_TTA) \ - cptr |= CPTR_EL2_TTA; \ - if ((clr) & CPTR_EL2_TAM) \ - cptr |= CPTR_EL2_TAM; \ - if ((clr) & CPTR_EL2_TCPAC) \ - cptr |= CPTR_EL2_TCPAC; \ - \ - cptr; \ - }) - -#define __cpacr_to_cptr_set(clr, set) \ - ({ \ - u64 cptr = 0; \ - \ - if ((clr) & CPACR_EL1_FPEN) \ - cptr |= CPTR_EL2_TFP; \ - if ((clr) & CPACR_EL1_ZEN) \ - cptr |= CPTR_EL2_TZ; \ - if ((clr) & CPACR_EL1_SMEN) \ - cptr |= CPTR_EL2_TSM; \ - if ((set) & CPACR_EL1_TTA) \ - cptr |= CPTR_EL2_TTA; \ - if ((set) & CPTR_EL2_TAM) \ - cptr |= CPTR_EL2_TAM; \ - if ((set) & CPTR_EL2_TCPAC) \ - cptr |= CPTR_EL2_TCPAC; \ - \ - cptr; \ - }) - -#define cpacr_clear_set(clr, set) \ - do { \ - BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0); \ - BUILD_BUG_ON((clr) & CPACR_EL1_E0POE); \ - __build_check_all_or_none((clr), CPACR_EL1_FPEN); \ - __build_check_all_or_none((set), CPACR_EL1_FPEN); \ - __build_check_all_or_none((clr), CPACR_EL1_ZEN); \ - __build_check_all_or_none((set), CPACR_EL1_ZEN); \ - __build_check_all_or_none((clr), CPACR_EL1_SMEN); \ - __build_check_all_or_none((set), CPACR_EL1_SMEN); \ - \ - if (has_vhe() || has_hvhe()) \ - sysreg_clear_set(cpacr_el1, clr, set); \ - else \ - sysreg_clear_set(cptr_el2, \ - __cpacr_to_cptr_clr(clr, set), \ - __cpacr_to_cptr_set(clr, set));\ - } while (0) - /* * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE * format if E2H isn't set. From 04c5355b2a94ff3191ce63ab035fb7f04d036869 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jun 2025 14:37:18 +0100 Subject: [PATCH 457/497] KVM: arm64: VHE: Centralize ISBs when returning to host The VHE hyp code has recently gained a few ISBs. Simplify this to one unconditional ISB in __kvm_vcpu_run_vhe(), and remove the unnecessary ISB from the kvm_call_hyp_ret() macro. While kvm_call_hyp_ret() is also used to invoke __vgic_v3_get_gic_config(), but no ISB is necessary in that case either. For the moment, an ISB is left in kvm_call_hyp(), as there are many more users, and removing the ISB would require a more thorough audit. Suggested-by: Marc Zyngier Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Fuad Tabba Cc: Marc Zyngier Cc: Mark Brown Cc: Oliver Upton Cc: Will Deacon Link: https://lore.kernel.org/r/20250617133718.4014181-8-mark.rutland@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 6 ++---- arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 3 --- arch/arm64/kvm/hyp/vhe/switch.c | 25 +++++++++++------------ 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5ccca509dff1..d27079968341 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1289,9 +1289,8 @@ void kvm_arm_resume_guest(struct kvm *kvm); }) /* - * The couple of isb() below are there to guarantee the same behaviour - * on VHE as on !VHE, where the eret to EL1 acts as a context - * synchronization event. + * The isb() below is there to guarantee the same behaviour on VHE as on !VHE, + * where the eret to EL1 acts as a context synchronization event. */ #define kvm_call_hyp(f, ...) \ do { \ @@ -1309,7 +1308,6 @@ void kvm_arm_resume_guest(struct kvm *kvm); \ if (has_vhe()) { \ ret = f(__VA_ARGS__); \ - isb(); \ } else { \ ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h index 73881e1dc267..502a5b73ee70 100644 --- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -167,9 +167,6 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) __debug_save_state(guest_dbg, guest_ctxt); __debug_restore_state(host_dbg, host_ctxt); - - if (has_vhe()) - isb(); } #endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 32b4814eb2b7..477f1580ffea 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -558,10 +558,10 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) host_ctxt = host_data_ptr(host_ctxt); guest_ctxt = &vcpu->arch.ctxt; - sysreg_save_host_state_vhe(host_ctxt); - fpsimd_lazy_switch_to_guest(vcpu); + sysreg_save_host_state_vhe(host_ctxt); + /* * Note that ARM erratum 1165522 requires us to configure both stage 1 * and stage 2 translation for the guest context before we clear @@ -586,18 +586,23 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); - /* Ensure CPTR trap deactivation has taken effect */ + sysreg_restore_host_state_vhe(host_ctxt); + + __debug_switch_to_host(vcpu); + + /* + * Ensure that all system register writes above have taken effect + * before returning to the host. In VHE mode, CPTR traps for + * FPSIMD/SVE/SME also apply to EL2, so FPSIMD/SVE/SME state must be + * manipulated after the ISB. + */ isb(); fpsimd_lazy_switch_to_host(vcpu); - sysreg_restore_host_state_vhe(host_ctxt); - if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); - __debug_switch_to_host(vcpu); - return exit_code; } NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe); @@ -627,12 +632,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) */ local_daif_restore(DAIF_PROCCTX_NOIRQ); - /* - * When we exit from the guest we change a number of CPU configuration - * parameters, such as traps. We rely on the isb() in kvm_call_hyp*() - * to make sure these changes take effect before running the host or - * additional guests. - */ return ret; } From e353b0854d3a1a31cb061df8d022fbfea53a0f24 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Mon, 16 Jun 2025 11:37:43 +0000 Subject: [PATCH 458/497] net: lan743x: fix potential out-of-bounds write in lan743x_ptp_io_event_clock_get() Before calling lan743x_ptp_io_event_clock_get(), the 'channel' value is checked against the maximum value of PCI11X1X_PTP_IO_MAX_CHANNELS(8). This seems correct and aligns with the PTP interrupt status register (PTP_INT_STS) specifications. However, lan743x_ptp_io_event_clock_get() writes to ptp->extts[] with only LAN743X_PTP_N_EXTTS(4) elements, using channel as an index: lan743x_ptp_io_event_clock_get(..., u8 channel,...) { ... /* Update Local timestamp */ extts = &ptp->extts[channel]; extts->ts.tv_sec = sec; ... } To avoid an out-of-bounds write and utilize all the supported GPIO inputs, set LAN743X_PTP_N_EXTTS to 8. Detected using the static analysis tool - Svace. Fixes: 60942c397af6 ("net: lan743x: Add support for PTP-IO Event Input External Timestamp (extts)") Signed-off-by: Alexey Kodanev Reviewed-by: Jacob Keller Acked-by: Rengarajan S Link: https://patch.msgid.link/20250616113743.36284-1-aleksei.kodanev@bell-sw.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan743x_ptp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.h b/drivers/net/ethernet/microchip/lan743x_ptp.h index e8d073bfa2ca..f33dc83c5700 100644 --- a/drivers/net/ethernet/microchip/lan743x_ptp.h +++ b/drivers/net/ethernet/microchip/lan743x_ptp.h @@ -18,9 +18,9 @@ */ #define LAN743X_PTP_N_EVENT_CHAN 2 #define LAN743X_PTP_N_PEROUT LAN743X_PTP_N_EVENT_CHAN -#define LAN743X_PTP_N_EXTTS 4 -#define LAN743X_PTP_N_PPS 0 #define PCI11X1X_PTP_IO_MAX_CHANNELS 8 +#define LAN743X_PTP_N_EXTTS PCI11X1X_PTP_IO_MAX_CHANNELS +#define LAN743X_PTP_N_PPS 0 #define PTP_CMD_CTL_TIMEOUT_CNT 50 struct lan743x_adapter; From 8ea688a3372e8369dc04395b39b4e71a6d91d4d5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 27 May 2025 20:12:47 -0400 Subject: [PATCH 459/497] nfsd: use threads array as-is in netlink interface The old nfsdfs interface for starting a server with multiple pools handles the special case of a single entry array passed down from userland by distributing the threads over every NUMA node. The netlink control interface however constructs an array of length nfsd_nrpools() and fills any unprovided slots with 0's. This behavior defeats the special casing that the old interface relies on. Change nfsd_nl_threads_set_doit() to pass down the array from userland as-is. Fixes: 7f5c330b2620 ("nfsd: allow passing in array of thread counts via netlink") Cc: stable@vger.kernel.org Reported-by: Mike Snitzer Closes: https://lore.kernel.org/linux-nfs/aDC-ftnzhJAlwqwh@kernel.org/ Signed-off-by: Jeff Layton Reviewed-by: Simon Horman Tested-by: Mike Snitzer Signed-off-by: Chuck Lever --- fs/nfsd/nfsctl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3f3e9f6c4250..6a42cc7a845a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1611,7 +1611,7 @@ int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, */ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) { - int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem; + int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem; struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct nlattr *attr; @@ -1623,12 +1623,11 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) /* count number of SERVER_THREADS values */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) - count++; + nrpools++; } mutex_lock(&nfsd_mutex); - nrpools = max(count, nfsd_nrpools(net)); nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); if (!nthreads) { ret = -ENOMEM; From 94d10a4dba0bc482f2b01e39f06d5513d0f75742 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 19 Jun 2025 06:01:55 -0400 Subject: [PATCH 460/497] sunrpc: handle SVC_GARBAGE during svc auth processing as auth error tianshuo han reported a remotely-triggerable crash if the client sends a kernel RPC server a specially crafted packet. If decoding the RPC reply fails in such a way that SVC_GARBAGE is returned without setting the rq_accept_statp pointer, then that pointer can be dereferenced and a value stored there. If it's the first time the thread has processed an RPC, then that pointer will be set to NULL and the kernel will crash. In other cases, it could create a memory scribble. The server sunrpc code treats a SVC_GARBAGE return from svc_authenticate or pg_authenticate as if it should send a GARBAGE_ARGS reply. RFC 5531 says that if authentication fails that the RPC should be rejected instead with a status of AUTH_ERR. Handle a SVC_GARBAGE return as an AUTH_ERROR, with a reason of AUTH_BADCRED instead of returning GARBAGE_ARGS in that case. This sidesteps the whole problem of touching the rpc_accept_statp pointer in this situation and avoids the crash. Cc: stable@kernel.org Fixes: 29cd2927fb91 ("SUNRPC: Fix encoding of accepted but unsuccessful RPC replies") Reported-by: tianshuo han Reviewed-by: Chuck Lever Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index ef8a05aac87f..9c93b854e809 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1371,7 +1371,8 @@ svc_process_common(struct svc_rqst *rqstp) case SVC_OK: break; case SVC_GARBAGE: - goto err_garbage_args; + rqstp->rq_auth_stat = rpc_autherr_badcred; + goto err_bad_auth; case SVC_SYSERR: goto err_system_err; case SVC_DENIED: @@ -1512,14 +1513,6 @@ svc_process_common(struct svc_rqst *rqstp) *rqstp->rq_accept_statp = rpc_proc_unavail; goto sendit; -err_garbage_args: - svc_printk(rqstp, "failed to decode RPC header\n"); - - if (serv->sv_stats) - serv->sv_stats->rpcbadfmt++; - *rqstp->rq_accept_statp = rpc_garbage_args; - goto sendit; - err_system_err: if (serv->sv_stats) serv->sv_stats->rpcbadfmt++; From 8c8472855884355caf3d8e0c50adf825f83454b2 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 19 Jun 2025 12:10:31 +1000 Subject: [PATCH 461/497] ublk: santizize the arguments from userspace when adding a device Sanity check the values for queue depth and number of queues we get from userspace when adding a device. Signed-off-by: Ronnie Sahlberg Reviewed-by: Ming Lei Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Fixes: 62fe99cef94a ("ublk: add read()/write() support for ublk char device") Link: https://lore.kernel.org/r/20250619021031.181340-1-ronniesahlberg@gmail.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c637ea010d34..d36f44f5ee80 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2825,6 +2825,9 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header) if (copy_from_user(&info, argp, sizeof(info))) return -EFAULT; + if (info.queue_depth > UBLK_MAX_QUEUE_DEPTH || info.nr_hw_queues > UBLK_MAX_NR_QUEUES) + return -EINVAL; + if (capable(CAP_SYS_ADMIN)) info.flags &= ~UBLK_F_UNPRIVILEGED_DEV; else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV)) From 16c1241b08751a67cd7a0221ea9f82b0b02806f4 Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Thu, 12 Jun 2025 00:09:01 -0700 Subject: [PATCH 462/497] drm/xe/bmg: Update Wa_16023588340 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows for additional L2 caching modes. Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340") Cc: Matthew Auld Reviewed-by: Matthew Auld Signed-off-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20250612-wa-14022085890-v4-2-94ba5dcc1e30@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 6ab42fa03d4c88a0ddf5e56e62794853b198e7bf) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 0e5d243c9451..6c4cb9576fb6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -118,7 +118,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); } - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF); xe_force_wake_put(gt_to_fw(gt), fw_ref); } From 87a15c89d8c7b00b0fc94e0d4f554f7ee2fe6961 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 12 Jun 2025 15:14:12 -0700 Subject: [PATCH 463/497] drm/xe: Fix memset on iomem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should rather use xe_map_memset() as the BO is created with XE_BO_FLAG_VRAM_IF_DGFX in xe_guc_pc_init(). Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250612-vmap-vaddr-v1-1-26238ed443eb@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 21cf47d89fba353b2d5915ba4718040c4cb955d3) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_guc_pc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 18c623992035..3beaaa7b25c1 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -1068,7 +1068,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc) goto out; } - memset(pc->bo->vmap.vaddr, 0, size); + xe_map_memset(xe, &pc->bo->vmap, 0, 0, size); slpc_shared_data_write(pc, header.size, size); earlier = ktime_get(); From a39d082c3553d35b4fe5585e1e2fb221c130cae8 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 11 Jun 2025 14:44:54 -0700 Subject: [PATCH 464/497] drm/xe: Fix early wedge on GuC load failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the GuC fails to load we declare the device wedged. However, the very first GuC load attempt on GT0 (from xe_gt_init_hwconfig) is done before the GT1 GuC objects are initialized, so things go bad when the wedge code attempts to cleanup GT1. To fix this, check the initialization status in the functions called during wedge. Fixes: 7dbe8af13c18 ("drm/xe: Wedge the entire device") Signed-off-by: Daniele Ceraolo Spurio Cc: Rodrigo Vivi Cc: Matthew Brost Cc: Jonathan Cavitt Cc: Lucas De Marchi Cc: Zhanjun Dong Cc: stable@vger.kernel.org # v6.12+: 1e1981b16bb1: drm/xe: Fix taking invalid lock on wedge Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Jonathan Cavitt Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250611214453.1159846-2-daniele.ceraolospurio@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 0b93b7dcd9eb888a6ac7546560877705d4ad61bf) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 8 ++++++++ drivers/gpu/drm/xe/xe_guc_ct.c | 7 +++++-- drivers/gpu/drm/xe/xe_guc_ct.h | 5 +++++ drivers/gpu/drm/xe/xe_guc_submit.c | 3 +++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 084cbdeba8ea..e1362e608146 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -137,6 +137,14 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) struct xe_gt_tlb_invalidation_fence *fence, *next; int pending_seqno; + /* + * we can get here before the CTs are even initialized if we're wedging + * very early, in which case there are not going to be any pending + * fences so we can bail immediately. + */ + if (!xe_guc_ct_initialized(>->uc.guc.ct)) + return; + /* * CT channel is already disabled at this point. No new TLB requests can * appear. diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 2447de0ebedf..d0ac48d8f4f7 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -514,6 +514,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { + if (!xe_guc_ct_initialized(ct)) + return; + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -760,7 +763,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u16 seqno; int ret; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -1344,7 +1347,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 action; u32 *hxg; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 82c4ae458dda..582aac106469 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -22,6 +22,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot); void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb); +static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct) +{ + return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED; +} + static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct) { return ct->state == XE_GUC_CT_STATE_ENABLED; diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6d84a52b660a..9567f6700cf2 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1762,6 +1762,9 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc) { int ret; + if (!guc->submission_state.initialized) + return 0; + /* * Using an atomic here rather than submission_state.lock as this * function can be called while holding the CT lock (engine reset From a1113cefd7d62e20735edda79507dc9f6ce103ff Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 16 Jun 2025 15:44:37 -0700 Subject: [PATCH 465/497] MAINTAINERS: Remove Shannon Nelson from MAINTAINERS file Brett Creeley is taking ownership of AMD/Pensando drivers while I wander off into the sunset with my retirement this month. I'll still keep an eye out on a few topics for awhile, and maybe do some free-lance work in the future. Meanwhile, thank you all for the fun and support and the many learning opportunities :-). Special thanks go to DaveM for merging my first patch long ago, the big ionic patchset a few years ago, and my last patchset last week. Redirect things to a non-corporate account. Signed-off-by: Shannon Nelson Reviewed-by: Simon Horman Signed-off-by: Brett Creeley Link: https://patch.msgid.link/20250616224437.56581-1-shannon.nelson@amd.com [Jakub: squash in the .mailmap update] Signed-off-by: Shannon Nelson Link: https://patch.msgid.link/20250619010603.1173141-1-sln@onemain.com Signed-off-by: Jakub Kicinski --- .mailmap | 7 ++++--- MAINTAINERS | 5 +---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.mailmap b/.mailmap index b77cd34cf852..7a3ffabb3434 100644 --- a/.mailmap +++ b/.mailmap @@ -691,9 +691,10 @@ Serge Hallyn Serge Hallyn Seth Forshee Shakeel Butt -Shannon Nelson -Shannon Nelson -Shannon Nelson +Shannon Nelson +Shannon Nelson +Shannon Nelson +Shannon Nelson Sharath Chandra Vurukala Shiraz Hashim Shuah Khan diff --git a/MAINTAINERS b/MAINTAINERS index 99fbde007792..437381aeaa71 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1157,7 +1157,6 @@ F: arch/x86/include/asm/amd/node.h F: arch/x86/kernel/amd_node.c AMD PDS CORE DRIVER -M: Shannon Nelson M: Brett Creeley L: netdev@vger.kernel.org S: Maintained @@ -9941,7 +9940,6 @@ F: drivers/fwctl/mlx5/ FWCTL PDS DRIVER M: Brett Creeley -R: Shannon Nelson L: linux-kernel@vger.kernel.org S: Maintained F: drivers/fwctl/pds/ @@ -19377,7 +19375,7 @@ F: crypto/pcrypt.c F: include/crypto/pcrypt.h PDS DSC VIRTIO DATA PATH ACCELERATOR -R: Shannon Nelson +R: Brett Creeley F: drivers/vdpa/pds/ PECI HARDWARE MONITORING DRIVERS @@ -19399,7 +19397,6 @@ F: include/linux/peci-cpu.h F: include/linux/peci.h PENSANDO ETHERNET DRIVERS -M: Shannon Nelson M: Brett Creeley L: netdev@vger.kernel.org S: Maintained From 10876da918fa1aec0227fb4c67647513447f53a9 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 17 Jun 2025 15:40:42 -0700 Subject: [PATCH 466/497] calipso: Fix null-ptr-deref in calipso_req_{set,del}attr(). syzkaller reported a null-ptr-deref in sock_omalloc() while allocating a CALIPSO option. [0] The NULL is of struct sock, which was fetched by sk_to_full_sk() in calipso_req_setattr(). Since commit a1a5344ddbe8 ("tcp: avoid two atomic ops for syncookies"), reqsk->rsk_listener could be NULL when SYN Cookie is returned to its client, as hinted by the leading SYN Cookie log. Here are 3 options to fix the bug: 1) Return 0 in calipso_req_setattr() 2) Return an error in calipso_req_setattr() 3) Alaways set rsk_listener 1) is no go as it bypasses LSM, but 2) effectively disables SYN Cookie for CALIPSO. 3) is also no go as there have been many efforts to reduce atomic ops and make TCP robust against DDoS. See also commit 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under synflood"). As of the blamed commit, SYN Cookie already did not need refcounting, and no one has stumbled on the bug for 9 years, so no CALIPSO user will care about SYN Cookie. Let's return an error in calipso_req_setattr() and calipso_req_delattr() in the SYN Cookie case. This can be reproduced by [1] on Fedora and now connect() of nc times out. [0]: TCP: request_sock_TCPv6: Possible SYN flooding on port [::]:20002. Sending cookies. Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] CPU: 3 UID: 0 PID: 12262 Comm: syz.1.2611 Not tainted 6.14.0 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 RIP: 0010:read_pnet include/net/net_namespace.h:406 [inline] RIP: 0010:sock_net include/net/sock.h:655 [inline] RIP: 0010:sock_kmalloc+0x35/0x170 net/core/sock.c:2806 Code: 89 d5 41 54 55 89 f5 53 48 89 fb e8 25 e3 c6 fd e8 f0 91 e3 00 48 8d 7b 30 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 26 01 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b RSP: 0018:ffff88811af89038 EFLAGS: 00010216 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffff888105266400 RDX: 0000000000000006 RSI: ffff88800c890000 RDI: 0000000000000030 RBP: 0000000000000050 R08: 0000000000000000 R09: ffff88810526640e R10: ffffed1020a4cc81 R11: ffff88810526640f R12: 0000000000000000 R13: 0000000000000820 R14: ffff888105266400 R15: 0000000000000050 FS: 00007f0653a07640(0000) GS:ffff88811af80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f863ba096f4 CR3: 00000000163c0005 CR4: 0000000000770ef0 PKRU: 80000000 Call Trace: ipv6_renew_options+0x279/0x950 net/ipv6/exthdrs.c:1288 calipso_req_setattr+0x181/0x340 net/ipv6/calipso.c:1204 calipso_req_setattr+0x56/0x80 net/netlabel/netlabel_calipso.c:597 netlbl_req_setattr+0x18a/0x440 net/netlabel/netlabel_kapi.c:1249 selinux_netlbl_inet_conn_request+0x1fb/0x320 security/selinux/netlabel.c:342 selinux_inet_conn_request+0x1eb/0x2c0 security/selinux/hooks.c:5551 security_inet_conn_request+0x50/0xa0 security/security.c:4945 tcp_v6_route_req+0x22c/0x550 net/ipv6/tcp_ipv6.c:825 tcp_conn_request+0xec8/0x2b70 net/ipv4/tcp_input.c:7275 tcp_v6_conn_request+0x1e3/0x440 net/ipv6/tcp_ipv6.c:1328 tcp_rcv_state_process+0xafa/0x52b0 net/ipv4/tcp_input.c:6781 tcp_v6_do_rcv+0x8a6/0x1a40 net/ipv6/tcp_ipv6.c:1667 tcp_v6_rcv+0x505e/0x5b50 net/ipv6/tcp_ipv6.c:1904 ip6_protocol_deliver_rcu+0x17c/0x1da0 net/ipv6/ip6_input.c:436 ip6_input_finish+0x103/0x180 net/ipv6/ip6_input.c:480 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip6_input+0x13c/0x6b0 net/ipv6/ip6_input.c:491 dst_input include/net/dst.h:469 [inline] ip6_rcv_finish net/ipv6/ip6_input.c:79 [inline] ip6_rcv_finish+0xb6/0x490 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ipv6_rcv+0xf9/0x490 net/ipv6/ip6_input.c:309 __netif_receive_skb_one_core+0x12e/0x1f0 net/core/dev.c:5896 __netif_receive_skb+0x1d/0x170 net/core/dev.c:6009 process_backlog+0x41e/0x13b0 net/core/dev.c:6357 __napi_poll+0xbd/0x710 net/core/dev.c:7191 napi_poll net/core/dev.c:7260 [inline] net_rx_action+0x9de/0xde0 net/core/dev.c:7382 handle_softirqs+0x19a/0x770 kernel/softirq.c:561 do_softirq.part.0+0x36/0x70 kernel/softirq.c:462 do_softirq arch/x86/include/asm/preempt.h:26 [inline] __local_bh_enable_ip+0xf1/0x110 kernel/softirq.c:389 local_bh_enable include/linux/bottom_half.h:33 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:919 [inline] __dev_queue_xmit+0xc2a/0x3c40 net/core/dev.c:4679 dev_queue_xmit include/linux/netdevice.h:3313 [inline] neigh_hh_output include/net/neighbour.h:523 [inline] neigh_output include/net/neighbour.h:537 [inline] ip6_finish_output2+0xd69/0x1f80 net/ipv6/ip6_output.c:141 __ip6_finish_output net/ipv6/ip6_output.c:215 [inline] ip6_finish_output+0x5dc/0xd60 net/ipv6/ip6_output.c:226 NF_HOOK_COND include/linux/netfilter.h:303 [inline] ip6_output+0x24b/0x8d0 net/ipv6/ip6_output.c:247 dst_output include/net/dst.h:459 [inline] NF_HOOK include/linux/netfilter.h:314 [inline] NF_HOOK include/linux/netfilter.h:308 [inline] ip6_xmit+0xbbc/0x20d0 net/ipv6/ip6_output.c:366 inet6_csk_xmit+0x39a/0x720 net/ipv6/inet6_connection_sock.c:135 __tcp_transmit_skb+0x1a7b/0x3b40 net/ipv4/tcp_output.c:1471 tcp_transmit_skb net/ipv4/tcp_output.c:1489 [inline] tcp_send_syn_data net/ipv4/tcp_output.c:4059 [inline] tcp_connect+0x1c0c/0x4510 net/ipv4/tcp_output.c:4148 tcp_v6_connect+0x156c/0x2080 net/ipv6/tcp_ipv6.c:333 __inet_stream_connect+0x3a7/0xed0 net/ipv4/af_inet.c:677 tcp_sendmsg_fastopen+0x3e2/0x710 net/ipv4/tcp.c:1039 tcp_sendmsg_locked+0x1e82/0x3570 net/ipv4/tcp.c:1091 tcp_sendmsg+0x2f/0x50 net/ipv4/tcp.c:1358 inet6_sendmsg+0xb9/0x150 net/ipv6/af_inet6.c:659 sock_sendmsg_nosec net/socket.c:718 [inline] __sock_sendmsg+0xf4/0x2a0 net/socket.c:733 __sys_sendto+0x29a/0x390 net/socket.c:2187 __do_sys_sendto net/socket.c:2194 [inline] __se_sys_sendto net/socket.c:2190 [inline] __x64_sys_sendto+0xe1/0x1c0 net/socket.c:2190 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xc3/0x1d0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f06553c47ed Code: 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f0653a06fc8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f0655605fa0 RCX: 00007f06553c47ed RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000000000b RBP: 00007f065545db38 R08: 0000200000000140 R09: 000000000000001c R10: f7384d4ea84b01bd R11: 0000000000000246 R12: 0000000000000000 R13: 00007f0655605fac R14: 00007f0655606038 R15: 00007f06539e7000 Modules linked in: [1]: dnf install -y selinux-policy-targeted policycoreutils netlabel_tools procps-ng nmap-ncat mount -t selinuxfs none /sys/fs/selinux load_policy netlabelctl calipso add pass doi:1 netlabelctl map del default netlabelctl map add default address:::1 protocol:calipso,1 sysctl net.ipv4.tcp_syncookies=2 nc -l ::1 80 & nc ::1 80 Fixes: e1adea927080 ("calipso: Allow request sockets to be relabelled by the lsm.") Reported-by: syzkaller Reported-by: John Cheung Closes: https://lore.kernel.org/netdev/CAP=Rh=MvfhrGADy+-WJiftV2_WzMH4VEhEFmeT28qY+4yxNu4w@mail.gmail.com/ Signed-off-by: Kuniyuki Iwashima Acked-by: Paul Moore Link: https://patch.msgid.link/20250617224125.17299-1-kuni1840@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/calipso.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 62618a058b8f..a247bb93908b 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1207,6 +1207,10 @@ static int calipso_req_setattr(struct request_sock *req, struct ipv6_opt_hdr *old, *new; struct sock *sk = sk_to_full_sk(req_to_sk(req)); + /* sk is NULL for SYN+ACK w/ SYN Cookie */ + if (!sk) + return -ENOMEM; + if (req_inet->ipv6_opt && req_inet->ipv6_opt->hopopt) old = req_inet->ipv6_opt->hopopt; else @@ -1247,6 +1251,10 @@ static void calipso_req_delattr(struct request_sock *req) struct ipv6_txoptions *txopts; struct sock *sk = sk_to_full_sk(req_to_sk(req)); + /* sk is NULL for SYN+ACK w/ SYN Cookie */ + if (!sk) + return; + if (!req_inet->ipv6_opt || !req_inet->ipv6_opt->hopopt) return; From fc27ab48904ceb7e4792f0c400f1ef175edf16fe Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 18 Jun 2025 09:36:50 +0200 Subject: [PATCH 467/497] NFC: nci: uart: Set tty->disc_data only in success path Setting tty->disc_data before opening the NCI device means we need to clean it up on error paths. This also opens some short window if device starts sending data, even before NCIUARTSETDRIVER IOCTL succeeded (broken hardware?). Close the window by exposing tty->disc_data only on the success path, when opening of the NCI device and try_module_get() succeeds. The code differs in error path in one aspect: tty->disc_data won't be ever assigned thus NULL-ified. This however should not be relevant difference, because of "tty->disc_data=NULL" in nci_uart_tty_open(). Cc: Linus Torvalds Fixes: 9961127d4bce ("NFC: nci: add generic uart support") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Greg Kroah-Hartman Link: https://patch.msgid.link/20250618073649.25049-2-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- net/nfc/nci/uart.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index ed1508a9e093..aab107727f18 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -119,22 +119,22 @@ static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver) memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart)); nu->tty = tty; - tty->disc_data = nu; skb_queue_head_init(&nu->tx_q); INIT_WORK(&nu->write_work, nci_uart_write_work); spin_lock_init(&nu->rx_lock); ret = nu->ops.open(nu); if (ret) { - tty->disc_data = NULL; kfree(nu); + return ret; } else if (!try_module_get(nu->owner)) { nu->ops.close(nu); - tty->disc_data = NULL; kfree(nu); return -ENOENT; } - return ret; + tty->disc_data = nu; + + return 0; } /* ------ LDISC part ------ */ From 78bd03ee1f20a267d2c218884b66041b3508ac9c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 18 Jun 2025 09:37:40 +0200 Subject: [PATCH 468/497] net: airoha: Always check return value from airoha_ppe_foe_get_entry() airoha_ppe_foe_get_entry routine can return NULL, so check the returned pointer is not NULL in airoha_ppe_foe_flow_l2_entry_update() Fixes: b81e0f2b58be3 ("net: airoha: Add FLOW_CLS_STATS callback support") Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250618-check-ret-from-airoha_ppe_foe_get_entry-v2-1-068dcea3cc66@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_ppe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 9067d2fc7706..0e217acfc5ef 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -809,8 +809,10 @@ airoha_ppe_foe_flow_l2_entry_update(struct airoha_ppe *ppe, int idle; hwe = airoha_ppe_foe_get_entry(ppe, iter->hash); - ib1 = READ_ONCE(hwe->ib1); + if (!hwe) + continue; + ib1 = READ_ONCE(hwe->ib1); state = FIELD_GET(AIROHA_FOE_IB1_BIND_STATE, ib1); if (state != AIROHA_FOE_STATE_BIND) { iter->hash = 0xffff; From e7ea5f5b1858ddb96b152584d5fe06e6fc623e89 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Wed, 18 Jun 2025 13:59:02 +0000 Subject: [PATCH 469/497] mlxbf_gige: return EPROBE_DEFER if PHY IRQ is not available The message "Error getting PHY irq. Use polling instead" is emitted when the mlxbf_gige driver is loaded by the kernel before the associated gpio-mlxbf driver, and thus the call to get the PHY IRQ fails since it is not yet available. The driver probe() must return -EPROBE_DEFER if acpi_dev_gpio_irq_get_by() returns the same. Fixes: 6c2a6ddca763 ("net: mellanox: mlxbf_gige: Replace non-standard interrupt handling") Signed-off-by: David Thompson Reviewed-by: Asmaa Mnebhi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250618135902.346-1-davthompson@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index fb2e5b844c15..d76d7a945899 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -447,8 +447,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX); phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy", 0); - if (phy_irq < 0) { - dev_err(&pdev->dev, "Error getting PHY irq. Use polling instead"); + if (phy_irq == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto out; + } else if (phy_irq < 0) { phy_irq = PHY_POLL; } From d13a3824bfd2b4774b671a75cf766a16637a0e67 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jun 2025 14:08:43 +0000 Subject: [PATCH 470/497] net: atm: add lec_mutex syzbot found its way in net/atm/lec.c, and found an error path in lecd_attach() could leave a dangling pointer in dev_lec[]. Add a mutex to protect dev_lecp[] uses from lecd_attach(), lec_vcc_attach() and lec_mcast_attach(). Following patch will use this mutex for /proc/net/atm/lec. BUG: KASAN: slab-use-after-free in lecd_attach net/atm/lec.c:751 [inline] BUG: KASAN: slab-use-after-free in lane_ioctl+0x2224/0x23e0 net/atm/lec.c:1008 Read of size 8 at addr ffff88807c7b8e68 by task syz.1.17/6142 CPU: 1 UID: 0 PID: 6142 Comm: syz.1.17 Not tainted 6.16.0-rc1-syzkaller-00239-g08215f5486ec #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xcd/0x680 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 lecd_attach net/atm/lec.c:751 [inline] lane_ioctl+0x2224/0x23e0 net/atm/lec.c:1008 do_vcc_ioctl+0x12c/0x930 net/atm/ioctl.c:159 sock_do_ioctl+0x118/0x280 net/socket.c:1190 sock_ioctl+0x227/0x6b0 net/socket.c:1311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl fs/ioctl.c:893 [inline] __x64_sys_ioctl+0x18e/0x210 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x4c0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Allocated by task 6132: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0xaa/0xb0 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4328 [inline] __kvmalloc_node_noprof+0x27b/0x620 mm/slub.c:5015 alloc_netdev_mqs+0xd2/0x1570 net/core/dev.c:11711 lecd_attach net/atm/lec.c:737 [inline] lane_ioctl+0x17db/0x23e0 net/atm/lec.c:1008 do_vcc_ioctl+0x12c/0x930 net/atm/ioctl.c:159 sock_do_ioctl+0x118/0x280 net/socket.c:1190 sock_ioctl+0x227/0x6b0 net/socket.c:1311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl fs/ioctl.c:893 [inline] __x64_sys_ioctl+0x18e/0x210 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x4c0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 6132: kasan_save_stack+0x33/0x60 mm/kasan/common.c:47 kasan_save_track+0x14/0x30 mm/kasan/common.c:68 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:247 [inline] __kasan_slab_free+0x51/0x70 mm/kasan/common.c:264 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2381 [inline] slab_free mm/slub.c:4643 [inline] kfree+0x2b4/0x4d0 mm/slub.c:4842 free_netdev+0x6c5/0x910 net/core/dev.c:11892 lecd_attach net/atm/lec.c:744 [inline] lane_ioctl+0x1ce8/0x23e0 net/atm/lec.c:1008 do_vcc_ioctl+0x12c/0x930 net/atm/ioctl.c:159 sock_do_ioctl+0x118/0x280 net/socket.c:1190 sock_ioctl+0x227/0x6b0 net/socket.c:1311 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl fs/ioctl.c:893 [inline] __x64_sys_ioctl+0x18e/0x210 fs/ioctl.c:893 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+8b64dec3affaed7b3af5@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/6852c6f6.050a0220.216029.0018.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20250618140844.1686882-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/atm/lec.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/atm/lec.c b/net/atm/lec.c index acef984f3367..1e1f3eb0e2ba 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -124,6 +124,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; +static DEFINE_MUTEX(lec_mutex); #if IS_ENABLED(CONFIG_BRIDGE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) @@ -685,6 +686,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) int bytes_left; struct atmlec_ioc ioc_data; + lockdep_assert_held(&lec_mutex); /* Lecd must be up in this case */ bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); if (bytes_left != 0) @@ -710,6 +712,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { + lockdep_assert_held(&lec_mutex); if (arg < 0 || arg >= MAX_LEC_ITF) return -EINVAL; arg = array_index_nospec(arg, MAX_LEC_ITF); @@ -725,6 +728,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) int i; struct lec_priv *priv; + lockdep_assert_held(&lec_mutex); if (arg < 0) arg = 0; if (arg >= MAX_LEC_ITF) @@ -742,6 +746,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); + dev_lec[i] = NULL; return -EINVAL; } @@ -1003,6 +1008,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } + mutex_lock(&lec_mutex); switch (cmd) { case ATMLEC_CTRL: err = lecd_attach(vcc, (int)arg); @@ -1017,6 +1023,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } + mutex_unlock(&lec_mutex); return err; } From d03b79f459c7935cff830d98373474f440bd03ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jun 2025 14:08:44 +0000 Subject: [PATCH 471/497] net: atm: fix /proc/net/atm/lec handling /proc/net/atm/lec must ensure safety against dev_lec[] changes. It appears it had dev_put() calls without prior dev_hold(), leading to imbalance and UAF. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Acked-by: Francois Romieu # Minor atm contributor Link: https://patch.msgid.link/20250618140844.1686882-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/atm/lec.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index 1e1f3eb0e2ba..afb8d3eb2185 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -909,7 +909,6 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l) v = (dev && netdev_priv(dev)) ? lec_priv_walk(state, l, netdev_priv(dev)) : NULL; if (!v && dev) { - dev_put(dev); /* Partial state reset for the next time we get called */ dev = NULL; } @@ -933,6 +932,7 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos) { struct lec_state *state = seq->private; + mutex_lock(&lec_mutex); state->itf = 0; state->dev = NULL; state->locked = NULL; @@ -950,8 +950,9 @@ static void lec_seq_stop(struct seq_file *seq, void *v) if (state->dev) { spin_unlock_irqrestore(&state->locked->lec_arp_lock, state->flags); - dev_put(state->dev); + state->dev = NULL; } + mutex_unlock(&lec_mutex); } static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) From 9738280aae592b579a25b5b1b6584c894827d3c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 18 Jun 2025 10:17:46 -0700 Subject: [PATCH 472/497] tools: ynl: fix mixing ops and notifications on one socket The multi message support loosened the connection between the request and response handling, as we can now submit multiple requests before we start processing responses. Passing the attr set to NlMsgs decoding no longer makes sense (if it ever did), attr set may differ message by messsage. Isolate the part of decoding responsible for attr-set specific interpretation and call it once we identified the correct op. Without this fix performing SET operation on an ethtool socket, while being subscribed to notifications causes: # File "tools/net/ynl/pyynl/lib/ynl.py", line 1096, in _op # Exception| return self._ops(ops)[0] # Exception| ~~~~~~~~~^^^^^ # File "tools/net/ynl/pyynl/lib/ynl.py", line 1040, in _ops # Exception| nms = NlMsgs(reply, attr_space=op.attr_set) # Exception| ^^^^^^^^^^^ The value of op we use on line 1040 is stale, it comes form the previous loop. If a notification comes before a response we will update op to None and the next iteration thru the loop will break with the trace above. Fixes: 6fda63c45fe8 ("tools/net/ynl: fix cli.py --subscribe feature") Fixes: ba8be00f68f5 ("tools/net/ynl: Add multi message support to ynl") Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250618171746.1201403-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/lib/ynl.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 55b59f6c79b8..61deb5923067 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -231,14 +231,7 @@ class NlMsg: self.extack['unknown'].append(extack) if attr_space: - # We don't have the ability to parse nests yet, so only do global - if 'miss-type' in self.extack and 'miss-nest' not in self.extack: - miss_type = self.extack['miss-type'] - if miss_type in attr_space.attrs_by_val: - spec = attr_space.attrs_by_val[miss_type] - self.extack['miss-type'] = spec['name'] - if 'doc' in spec: - self.extack['miss-type-doc'] = spec['doc'] + self.annotate_extack(attr_space) def _decode_policy(self, raw): policy = {} @@ -264,6 +257,18 @@ class NlMsg: policy['mask'] = attr.as_scalar('u64') return policy + def annotate_extack(self, attr_space): + """ Make extack more human friendly with attribute information """ + + # We don't have the ability to parse nests yet, so only do global + if 'miss-type' in self.extack and 'miss-nest' not in self.extack: + miss_type = self.extack['miss-type'] + if miss_type in attr_space.attrs_by_val: + spec = attr_space.attrs_by_val[miss_type] + self.extack['miss-type'] = spec['name'] + if 'doc' in spec: + self.extack['miss-type-doc'] = spec['doc'] + def cmd(self): return self.nl_type @@ -277,12 +282,12 @@ class NlMsg: class NlMsgs: - def __init__(self, data, attr_space=None): + def __init__(self, data): self.msgs = [] offset = 0 while offset < len(data): - msg = NlMsg(data, offset, attr_space=attr_space) + msg = NlMsg(data, offset) offset += msg.nl_len self.msgs.append(msg) @@ -1034,12 +1039,13 @@ class YnlFamily(SpecFamily): op_rsp = [] while not done: reply = self.sock.recv(self._recv_size) - nms = NlMsgs(reply, attr_space=op.attr_set) + nms = NlMsgs(reply) self._recv_dbg_print(reply, nms) for nl_msg in nms: if nl_msg.nl_seq in reqs_by_seq: (op, vals, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq] if nl_msg.extack: + nl_msg.annotate_extack(op.attr_set) self._decode_extack(req_msg, op, nl_msg.extack, vals) else: op = None From edf8afeecfbb0b8c1a2edb8c8892d2f759d35321 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 19 Jun 2025 09:07:24 +0200 Subject: [PATCH 473/497] net: airoha: Compute number of descriptors according to reserved memory size In order to not exceed the reserved memory size for hwfd buffers, compute the number of hwfd buffers/descriptors according to the reserved memory size and the size of each hwfd buffer (2KB). Fixes: 3a1ce9e3d01b ("net: airoha: Add the capability to allocate hwfd buffers via reserved-memory") Reviewed-by: Simon Horman Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250619-airoha-hw-num-desc-v4-1-49600a9b319a@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index a7ec609d64de..1b7fd7ee0cbf 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1065,19 +1065,13 @@ static void airoha_qdma_cleanup_tx_queue(struct airoha_queue *q) static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) { + int size, index, num_desc = HW_DSCP_NUM; struct airoha_eth *eth = qdma->eth; int id = qdma - ð->qdma[0]; dma_addr_t dma_addr; const char *name; - int size, index; u32 status; - size = HW_DSCP_NUM * sizeof(struct airoha_qdma_fwd_desc); - if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) - return -ENOMEM; - - airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr); - name = devm_kasprintf(eth->dev, GFP_KERNEL, "qdma%d-buf", id); if (!name) return -ENOMEM; @@ -1099,8 +1093,12 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) rmem = of_reserved_mem_lookup(np); of_node_put(np); dma_addr = rmem->base; + /* Compute the number of hw descriptors according to the + * reserved memory size and the payload buffer size + */ + num_desc = rmem->size / AIROHA_MAX_PACKET_SIZE; } else { - size = AIROHA_MAX_PACKET_SIZE * HW_DSCP_NUM; + size = AIROHA_MAX_PACKET_SIZE * num_desc; if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) return -ENOMEM; @@ -1108,6 +1106,11 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) airoha_qdma_wr(qdma, REG_FWD_BUF_BASE, dma_addr); + size = num_desc * sizeof(struct airoha_qdma_fwd_desc); + if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) + return -ENOMEM; + + airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr); airoha_qdma_rmw(qdma, REG_HW_FWD_DSCP_CFG, HW_FWD_DSCP_PAYLOAD_SIZE_MASK, FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, 0)); @@ -1116,7 +1119,7 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) airoha_qdma_rmw(qdma, REG_LMGR_INIT_CFG, LMGR_INIT_START | LMGR_SRAM_MODE_MASK | HW_FWD_DESC_NUM_MASK, - FIELD_PREP(HW_FWD_DESC_NUM_MASK, HW_DSCP_NUM) | + FIELD_PREP(HW_FWD_DESC_NUM_MASK, num_desc) | LMGR_INIT_START | LMGR_SRAM_MODE_MASK); return read_poll_timeout(airoha_qdma_rr, status, From 7b46bdaec00a675f6fac9d0b01a2105b5746ebe9 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 19 Jun 2025 09:07:25 +0200 Subject: [PATCH 474/497] net: airoha: Differentiate hwfd buffer size for QDMA0 and QDMA1 EN7581 SoC allows configuring the size and the number of buffers in hwfd payload queue for both QDMA0 and QDMA1. In order to reduce the required DRAM used for hwfd buffers queues and decrease the memory footprint, differentiate hwfd buffer size for QDMA0 and QDMA1 and reduce hwfd buffer size to 1KB for QDMA1 (WAN) while maintaining 2KB for QDMA0 (LAN). Signed-off-by: Lorenzo Bianconi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250619-airoha-hw-num-desc-v4-2-49600a9b319a@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_eth.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index 1b7fd7ee0cbf..06dea3a13e77 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -1068,14 +1068,15 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) int size, index, num_desc = HW_DSCP_NUM; struct airoha_eth *eth = qdma->eth; int id = qdma - ð->qdma[0]; + u32 status, buf_size; dma_addr_t dma_addr; const char *name; - u32 status; name = devm_kasprintf(eth->dev, GFP_KERNEL, "qdma%d-buf", id); if (!name) return -ENOMEM; + buf_size = id ? AIROHA_MAX_PACKET_SIZE / 2 : AIROHA_MAX_PACKET_SIZE; index = of_property_match_string(eth->dev->of_node, "memory-region-names", name); if (index >= 0) { @@ -1096,9 +1097,9 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) /* Compute the number of hw descriptors according to the * reserved memory size and the payload buffer size */ - num_desc = rmem->size / AIROHA_MAX_PACKET_SIZE; + num_desc = div_u64(rmem->size, buf_size); } else { - size = AIROHA_MAX_PACKET_SIZE * num_desc; + size = buf_size * num_desc; if (!dmam_alloc_coherent(eth->dev, size, &dma_addr, GFP_KERNEL)) return -ENOMEM; @@ -1111,9 +1112,10 @@ static int airoha_qdma_init_hfwd_queues(struct airoha_qdma *qdma) return -ENOMEM; airoha_qdma_wr(qdma, REG_FWD_DSCP_BASE, dma_addr); + /* QDMA0: 2KB. QDMA1: 1KB */ airoha_qdma_rmw(qdma, REG_HW_FWD_DSCP_CFG, HW_FWD_DSCP_PAYLOAD_SIZE_MASK, - FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, 0)); + FIELD_PREP(HW_FWD_DSCP_PAYLOAD_SIZE_MASK, !!id)); airoha_qdma_rmw(qdma, REG_FWD_DSCP_LOW_THR, FWD_DSCP_LOW_THR_MASK, FIELD_PREP(FWD_DSCP_LOW_THR_MASK, 128)); airoha_qdma_rmw(qdma, REG_LMGR_INIT_CFG, From 6463cbe08b0cbf9bba8763306764f5fd643023e1 Mon Sep 17 00:00:00 2001 From: Pablo Martin-Gomez Date: Wed, 18 Jun 2025 13:35:16 +0200 Subject: [PATCH 475/497] mtd: spinand: fix memory leak of ECC engine conf Memory allocated for the ECC engine conf is not released during spinand cleanup. Below kmemleak trace is seen for this memory leak: unreferenced object 0xffffff80064f00e0 (size 8): comm "swapper/0", pid 1, jiffies 4294937458 hex dump (first 8 bytes): 00 00 00 00 00 00 00 00 ........ backtrace (crc 0): kmemleak_alloc+0x30/0x40 __kmalloc_cache_noprof+0x208/0x3c0 spinand_ondie_ecc_init_ctx+0x114/0x200 nand_ecc_init_ctx+0x70/0xa8 nanddev_ecc_engine_init+0xec/0x27c spinand_probe+0xa2c/0x1620 spi_mem_probe+0x130/0x21c spi_probe+0xf0/0x170 really_probe+0x17c/0x6e8 __driver_probe_device+0x17c/0x21c driver_probe_device+0x58/0x180 __device_attach_driver+0x15c/0x1f8 bus_for_each_drv+0xec/0x150 __device_attach+0x188/0x24c device_initial_probe+0x10/0x20 bus_probe_device+0x11c/0x160 Fix the leak by calling nanddev_ecc_engine_cleanup() inside spinand_cleanup(). Signed-off-by: Pablo Martin-Gomez Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 7099db7a62be..c411fe9be3ef 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1585,6 +1585,7 @@ static void spinand_cleanup(struct spinand_device *spinand) { struct nand_device *nand = spinand_to_nand(spinand); + nanddev_ecc_engine_cleanup(nand); nanddev_cleanup(nand); spinand_manufacturer_cleanup(spinand); kfree(spinand->databuf); From 51a4598ad5d9eb6be4ec9ba65bbfdf0ac302eb2e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Jun 2025 07:41:21 -0600 Subject: [PATCH 476/497] io_uring/net: always use current transfer count for buffer put A previous fix corrected the retry condition for when to continue a current bundle, but it missed that the current (not the total) transfer count also applies to the buffer put. If not, then for incrementally consumed buffer rings repeated completions on the same request may end up over consuming. Reported-by: Roy Tang (ErgoniaTrading) Cc: stable@vger.kernel.org Fixes: 3a08988123c8 ("io_uring/net: only retry recv bundle for a full transfer") Link: https://github.com/axboe/liburing/issues/1423 Signed-off-by: Jens Axboe --- io_uring/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/net.c b/io_uring/net.c index e16633fd6630..9550d4c8f866 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -821,7 +821,7 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret, if (sr->flags & IORING_RECVSEND_BUNDLE) { size_t this_ret = *ret - sr->done_io; - cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, this_ret), + cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret), issue_flags); if (sr->retry) cflags = req->cqe.flags | (cflags & CQE_F_MASK); From 417b8af2e30d7f131682a893ad79c506fd39c624 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 20 Jun 2025 23:31:08 +0800 Subject: [PATCH 477/497] erofs: remove a superfluous check for encoded extents It is possible when an inode is split into segments for multi-threaded compression, and the tail extent of a segment could also be small. Fixes: 1d191b4ca51d ("erofs: implement encoded extent metadata") Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20250620153108.1368029-1-hsiangkao@linux.alibaba.com --- fs/erofs/zmap.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 6afcb054780d..0bebc6e3a4d7 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -639,12 +639,6 @@ static int z_erofs_map_blocks_ext(struct inode *inode, } } map->m_llen = lend - map->m_la; - if (!last && map->m_llen < sb->s_blocksize) { - erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu", - map->m_llen, map->m_la, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } return 0; } From b5aafcb4efd2bdacbc37753cf807d69faa6a7304 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Tue, 10 Jun 2025 10:14:19 +0800 Subject: [PATCH 478/497] KVM: TDX: Add new TDVMCALL status code for unsupported subfuncs Add the new TDVMCALL status code TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED and return it for unimplemented TDVMCALL subfunctions. Returning TDVMCALL_STATUS_INVALID_OPERAND when a subfunction is not implemented is vague because TDX guests can't tell the error is due to the subfunction is not supported or an invalid input of the subfunction. New GHCI spec adds TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED to avoid the ambiguity. Use it instead of TDVMCALL_STATUS_INVALID_OPERAND. Before the change, for common guest implementations, when a TDX guest receives TDVMCALL_STATUS_INVALID_OPERAND, it has two cases: 1. Some operand is invalid. It could change the operand to another value retry. 2. The subfunction is not supported. For case 1, an invalid operand usually means the guest implementation bug. Since the TDX guest can't tell which case is, the best practice for handling TDVMCALL_STATUS_INVALID_OPERAND is stopping calling such leaf, treating the failure as fatal if the TDVMCALL is essential or ignoring it if the TDVMCALL is optional. With this change, TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED could be sent to old TDX guest that do not know about it, but it is expected that the guest will make the same action as TDVMCALL_STATUS_INVALID_OPERAND. Currently, no known TDX guest checks TDVMCALL_STATUS_INVALID_OPERAND specifically; for example Linux just checks for success. Signed-off-by: Binbin Wu [Return it for untrapped KVM_HC_MAP_GPA_RANGE. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/shared/tdx.h | 1 + arch/x86/kvm/vmx/tdx.c | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 2f3820342598..d8525e6ef50a 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -80,6 +80,7 @@ #define TDVMCALL_STATUS_RETRY 0x0000000000000001ULL #define TDVMCALL_STATUS_INVALID_OPERAND 0x8000000000000000ULL #define TDVMCALL_STATUS_ALIGN_ERROR 0x8000000000000002ULL +#define TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED 0x8000000000000003ULL /* * Bitmasks of exposed registers (with VMM). diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b952bc673271..5d100c240ab3 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1212,11 +1212,13 @@ static int tdx_map_gpa(struct kvm_vcpu *vcpu) /* * Converting TDVMCALL_MAP_GPA to KVM_HC_MAP_GPA_RANGE requires * userspace to enable KVM_CAP_EXIT_HYPERCALL with KVM_HC_MAP_GPA_RANGE - * bit set. If not, the error code is not defined in GHCI for TDX, use - * TDVMCALL_STATUS_INVALID_OPERAND for this case. + * bit set. This is a base call so it should always be supported, but + * KVM has no way to ensure that userspace implements the GHCI correctly. + * So if KVM_HC_MAP_GPA_RANGE does not cause a VMEXIT, return an error + * to the guest. */ if (!user_exit_on_hypercall(vcpu->kvm, KVM_HC_MAP_GPA_RANGE)) { - ret = TDVMCALL_STATUS_INVALID_OPERAND; + ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; goto error; } @@ -1476,7 +1478,7 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) break; } - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED); return 1; } From cf207eac06f661fb692f405d5ab8230df884ee52 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Tue, 10 Jun 2025 10:14:20 +0800 Subject: [PATCH 479/497] KVM: TDX: Handle TDG.VP.VMCALL Handle TDVMCALL for GetQuote to generate a TD-Quote. GetQuote is a doorbell-like interface used by TDX guests to request VMM to generate a TD-Quote signed by a service hosting TD-Quoting Enclave operating on the host. A TDX guest passes a TD Report (TDREPORT_STRUCT) in a shared-memory area as parameter. Host VMM can access it and queue the operation for a service hosting TD-Quoting enclave. When completed, the Quote is returned via the same shared-memory area. KVM only checks the GPA from the TDX guest has the shared-bit set and drops the shared-bit before exiting to userspace to avoid bleeding the shared-bit into KVM's exit ABI. KVM forwards the request to userspace VMM (e.g. QEMU) and userspace VMM queues the operation asynchronously. KVM sets the return code according to the 'ret' field set by userspace to notify the TDX guest whether the request has been queued successfully or not. When the request has been queued successfully, the TDX guest can poll the status field in the shared-memory area to check whether the Quote generation is completed or not. When completed, the generated Quote is returned via the same buffer. Add KVM_EXIT_TDX as a new exit reason to userspace. Userspace is required to handle the KVM exit reason as the initial support for TDX, by reentering KVM to ensure that the TDVMCALL is complete. While at it, add a note that KVM_EXIT_HYPERCALL also requires reentry with KVM_RUN. Signed-off-by: Binbin Wu Tested-by: Mikko Ylinen Acked-by: Kai Huang [Adjust userspace API. - Paolo] Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 49 +++++++++++++++++++++++++++++++++- arch/x86/kvm/vmx/tdx.c | 32 ++++++++++++++++++++++ include/uapi/linux/kvm.h | 17 ++++++++++++ 3 files changed, 97 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 1bd2d42e6424..115ec3c2b641 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6645,7 +6645,8 @@ to the byte array. .. note:: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, - KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding + KVM_EXIT_EPR, KVM_EXIT_HYPERCALL, KVM_EXIT_TDX, + KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. @@ -7174,6 +7175,52 @@ The valid value for 'flags' is: - KVM_NOTIFY_CONTEXT_INVALID -- the VM context is corrupted and not valid in VMCS. It would run into unknown result if resume the target VM. +:: + + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + u64 ret; + u64 data[5]; + } unknown; + struct { + u64 ret; + u64 gpa; + u64 size; + } get_quote; + }; + } tdx; + +Process a TDVMCALL from the guest. KVM forwards select TDVMCALL based +on the Guest-Hypervisor Communication Interface (GHCI) specification; +KVM bridges these requests to the userspace VMM with minimal changes, +placing the inputs in the union and copying them back to the guest +on re-entry. + +Flags are currently always zero, whereas ``nr`` contains the TDVMCALL +number from register R11. The remaining field of the union provide the +inputs and outputs of the TDVMCALL. Currently the following values of +``nr`` are defined: + +* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote +signed by a service hosting TD-Quoting Enclave operating on the host. +Parameters and return value are in the ``get_quote`` field of the union. +The ``gpa`` field and ``size`` specify the guest physical address +(without the shared bit set) and the size of a shared-memory buffer, in +which the TDX guest passes a TD Report. The ``ret`` field represents +the return value of the GetQuote request. When the request has been +queued successfully, the TDX guest can poll the status field in the +shared-memory area to check whether the Quote generation is completed or +not. When completed, the generated Quote is returned via the same buffer. + +KVM may add support for more values in the future that may cause a userspace +exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case, +it will enter with output fields already valid; in the common case, the +``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``. +Userspace need not do anything if it does not wish to support a TDVMCALL. :: /* Fix the size of the union. */ diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 5d100c240ab3..b619a3478983 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1465,6 +1465,36 @@ static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu) return 1; } +static int tdx_complete_simple(struct kvm_vcpu *vcpu) +{ + tdvmcall_set_return_code(vcpu, vcpu->run->tdx.unknown.ret); + return 1; +} + +static int tdx_get_quote(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + u64 gpa = tdx->vp_enter_args.r12; + u64 size = tdx->vp_enter_args.r13; + + /* The gpa of buffer must have shared bit set. */ + if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) { + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_GET_QUOTE; + vcpu->run->tdx.get_quote.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED; + vcpu->run->tdx.get_quote.gpa = gpa & ~gfn_to_gpa(kvm_gfn_direct_bits(tdx->vcpu.kvm)); + vcpu->run->tdx.get_quote.size = size; + + vcpu->arch.complete_userspace_io = tdx_complete_simple; + + return 0; +} + static int handle_tdvmcall(struct kvm_vcpu *vcpu) { switch (tdvmcall_leaf(vcpu)) { @@ -1474,6 +1504,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu) return tdx_report_fatal_error(vcpu); case TDVMCALL_GET_TD_VM_CALL_INFO: return tdx_get_td_vm_call_info(vcpu); + case TDVMCALL_GET_QUOTE: + return tdx_get_quote(vcpu); default: break; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d00b85cb168c..e23e7286ad1a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,22 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; From 25e8b1dd4883e6c251c3db5b347f3c8ae4ade921 Mon Sep 17 00:00:00 2001 From: Binbin Wu Date: Tue, 10 Jun 2025 10:14:21 +0800 Subject: [PATCH 480/497] KVM: TDX: Exit to userspace for GetTdVmCallInfo Exit to userspace for TDG.VP.VMCALL via KVM_EXIT_TDX, to allow userspace to provide information about the support of TDVMCALLs when r12 is 1 for the TDVMCALLs beyond the GHCI base API. GHCI spec defines the GHCI base TDVMCALLs: , , , , <#VE.RequestMMIO>, , , and . They must be supported by VMM to support TDX guests. For GetTdVmCallInfo - When leaf (r12) to enumerate TDVMCALL functionality is set to 0, successful execution indicates all GHCI base TDVMCALLs listed above are supported. Update the KVM TDX document with the set of the GHCI base APIs. - When leaf (r12) to enumerate TDVMCALL functionality is set to 1, it indicates the TDX guest is querying the supported TDVMCALLs beyond the GHCI base TDVMCALLs. Exit to userspace to let userspace set the TDVMCALL sub-function bit(s) accordingly to the leaf outputs. KVM could set the TDVMCALL bit(s) supported by itself when the TDVMCALLs don't need support from userspace after returning from userspace and before entering guest. Currently, no such TDVMCALLs implemented, KVM just sets the values returned from userspace. Suggested-by: Paolo Bonzini Signed-off-by: Binbin Wu [Adjust userspace API. - Paolo] Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 10 ++++++++ arch/x86/kvm/vmx/tdx.c | 43 ++++++++++++++++++++++++++++++---- include/uapi/linux/kvm.h | 5 ++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 115ec3c2b641..9abf93ee5f65 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7191,6 +7191,11 @@ The valid value for 'flags' is: u64 gpa; u64 size; } get_quote; + struct { + u64 ret; + u64 leaf; + u64 r11, r12, r13, r14; + } get_tdvmcall_info; }; } tdx; @@ -7216,6 +7221,11 @@ queued successfully, the TDX guest can poll the status field in the shared-memory area to check whether the Quote generation is completed or not. When completed, the generated Quote is returned via the same buffer. +* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support +status of TDVMCALLs. The output values for the given leaf should be +placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info`` +field of the union. + KVM may add support for more values in the future that may cause a userspace exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case, it will enter with output fields already valid; in the common case, the diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index b619a3478983..1ad20c273f3b 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -1451,18 +1451,53 @@ static int tdx_emulate_mmio(struct kvm_vcpu *vcpu) return 1; } +static int tdx_complete_get_td_vm_call_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_tdx *tdx = to_tdx(vcpu); + + tdvmcall_set_return_code(vcpu, vcpu->run->tdx.get_tdvmcall_info.ret); + + /* + * For now, there is no TDVMCALL beyond GHCI base API supported by KVM + * directly without the support from userspace, just set the value + * returned from userspace. + */ + tdx->vp_enter_args.r11 = vcpu->run->tdx.get_tdvmcall_info.r11; + tdx->vp_enter_args.r12 = vcpu->run->tdx.get_tdvmcall_info.r12; + tdx->vp_enter_args.r13 = vcpu->run->tdx.get_tdvmcall_info.r13; + tdx->vp_enter_args.r14 = vcpu->run->tdx.get_tdvmcall_info.r14; + + return 1; +} + static int tdx_get_td_vm_call_info(struct kvm_vcpu *vcpu) { struct vcpu_tdx *tdx = to_tdx(vcpu); - if (tdx->vp_enter_args.r12) - tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); - else { + switch (tdx->vp_enter_args.r12) { + case 0: tdx->vp_enter_args.r11 = 0; + tdx->vp_enter_args.r12 = 0; tdx->vp_enter_args.r13 = 0; tdx->vp_enter_args.r14 = 0; + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_SUCCESS); + return 1; + case 1: + vcpu->run->tdx.get_tdvmcall_info.leaf = tdx->vp_enter_args.r12; + vcpu->run->exit_reason = KVM_EXIT_TDX; + vcpu->run->tdx.flags = 0; + vcpu->run->tdx.nr = TDVMCALL_GET_TD_VM_CALL_INFO; + vcpu->run->tdx.get_tdvmcall_info.ret = TDVMCALL_STATUS_SUCCESS; + vcpu->run->tdx.get_tdvmcall_info.r11 = 0; + vcpu->run->tdx.get_tdvmcall_info.r12 = 0; + vcpu->run->tdx.get_tdvmcall_info.r13 = 0; + vcpu->run->tdx.get_tdvmcall_info.r14 = 0; + vcpu->arch.complete_userspace_io = tdx_complete_get_td_vm_call_info; + return 0; + default: + tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND); + return 1; } - return 1; } static int tdx_complete_simple(struct kvm_vcpu *vcpu) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e23e7286ad1a..37891580d05d 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -462,6 +462,11 @@ struct kvm_run { __u64 gpa; __u64 size; } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; }; } tdx; /* Fix the size of the union. */ From 33b6a1f155d627f5bd80c7485c598ce45428f74f Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Tue, 10 Jun 2025 19:34:48 +0200 Subject: [PATCH 481/497] rcu: Return early if callback is not specified Currently the call_rcu() API does not check whether a callback pointer is NULL. If NULL is passed, rcu_core() will try to invoke it, resulting in NULL pointer dereference and a kernel crash. To prevent this and improve debuggability, this patch adds a check for NULL and emits a kernel stack trace to help identify a faulty caller. Signed-off-by: Uladzislau Rezki (Sony) Reviewed-by: Joel Fernandes Signed-off-by: Joel Fernandes --- kernel/rcu/tree.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e8a4b720d7d2..14d4499c6fc3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3072,6 +3072,10 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) /* Misaligned rcu_head! */ WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); + /* Avoid NULL dereference if callback is NULL. */ + if (WARN_ON_ONCE(!func)) + return; + if (debug_rcu_head_queue(head)) { /* * Probable double call_rcu(), so leak the callback. From 3085ef9d9e7ab5ae4cddbe809e2e3b8dc11cdc75 Mon Sep 17 00:00:00 2001 From: Shiji Yang Date: Wed, 18 Jun 2025 23:07:43 +0800 Subject: [PATCH 482/497] irqchip/ath79-misc: Fix missing prototypes warnings ath79_misc_irq_init() was defined but unused since commit 51fa4f8912c0 ("MIPS: ath79: drop legacy IRQ code"), so it's time to drop it. The build also warns about a missing prototype of get_c0_perfcount_int(). Remove the stale leftover function and add the missing include. Signed-off-by: Shiji Yang Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/OSBPR01MB167032D2017645200787AAEBBC72A@OSBPR01MB1670.jpnprd01.prod.outlook.com --- drivers/irqchip/irq-ath79-misc.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c index 268cc18b781f..258b8e9a2d57 100644 --- a/drivers/irqchip/irq-ath79-misc.c +++ b/drivers/irqchip/irq-ath79-misc.c @@ -15,6 +15,8 @@ #include #include +#include + #define AR71XX_RESET_REG_MISC_INT_STATUS 0 #define AR71XX_RESET_REG_MISC_INT_ENABLE 4 @@ -177,21 +179,3 @@ static int __init ar7240_misc_intc_of_init( IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", ar7240_misc_intc_of_init); - -void __init ath79_misc_irq_init(void __iomem *regs, int irq, - int irq_base, bool is_ar71xx) -{ - struct irq_domain *domain; - - if (is_ar71xx) - ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; - else - ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; - - domain = irq_domain_create_legacy(NULL, ATH79_MISC_IRQ_COUNT, - irq_base, 0, &misc_irq_domain_ops, regs); - if (!domain) - panic("Failed to create MISC irqdomain"); - - ath79_misc_intc_domain_init(domain, irq); -} From 34331d7beed7576acfc98e991c39738b96162499 Mon Sep 17 00:00:00 2001 From: zhangjian Date: Thu, 19 Jun 2025 09:18:29 +0800 Subject: [PATCH 483/497] smb: client: fix first command failure during re-negotiation after fabc4ed200f9, server_unresponsive add a condition to check whether client need to reconnect depending on server->lstrp. When client failed to reconnect for some time and abort connection, server->lstrp is updated for the last time. In the following scene, server->lstrp is too old. This cause next command failure in re-negotiation rather than waiting for re-negotiation done. 1. mount -t cifs -o username=Everyone,echo_internal=10 //$server_ip/export /mnt 2. ssh $server_ip "echo b > /proc/sysrq-trigger &" 3. ls /mnt 4. sleep 21s 5. ssh $server_ip "service firewalld stop" 6. ls # return EHOSTDOWN If the interval between 5 and 6 is too small, 6 may trigger sending negotiation request. Before backgrounding cifsd thread try to receive negotiation response from server in cifs_readv_from_socket, server_unresponsive may trigger cifs_reconnect which cause 6 to be failed: ls thread ---------------- smb2_negotiate server->tcpStatus = CifsInNegotiate compound_send_recv wait_for_compound_request cifsd thread ---------------- cifs_readv_from_socket server_unresponsive server->tcpStatus == CifsInNegotiate && jiffies > server->lstrp + 20s cifs_reconnect cifs_abort_connection: mid_state = MID_RETRY_NEEDED ls thread ---------------- cifs_sync_mid_result return EAGAIN smb2_negotiate return EHOSTDOWN Though server->lstrp means last server response time, it is updated in cifs_abort_connection and cifs_get_tcp_session. We can also update server->lstrp before switching into CifsInNegotiate state to avoid failure in 6. Fixes: 7ccc1465465d ("smb: client: fix hang in wait_for_response() for negproto") Acked-by: Paulo Alcantara (Red Hat) Acked-by: Meetakshi Setiya Signed-off-by: zhangjian Signed-off-by: Steve French --- fs/smb/client/connect.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index c4fb80b37738..c48869c29e15 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4199,6 +4199,7 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, return 0; } + server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; spin_unlock(&server->srv_lock); From a379a8a2a0032e12e7ef397197c9c2ad011588d6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 18 Jun 2025 18:51:40 +0200 Subject: [PATCH 484/497] smb: client: fix max_sge overflow in smb_extract_folioq_to_rdma() This fixes the following problem: [ 749.901015] [ T8673] run fstests cifs/001 at 2025-06-17 09:40:30 [ 750.346409] [ T9870] ================================================================== [ 750.346814] [ T9870] BUG: KASAN: slab-out-of-bounds in smb_set_sge+0x2cc/0x3b0 [cifs] [ 750.347330] [ T9870] Write of size 8 at addr ffff888011082890 by task xfs_io/9870 [ 750.347705] [ T9870] [ 750.348077] [ T9870] CPU: 0 UID: 0 PID: 9870 Comm: xfs_io Kdump: loaded Not tainted 6.16.0-rc2-metze.02+ #1 PREEMPT(voluntary) [ 750.348082] [ T9870] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 750.348085] [ T9870] Call Trace: [ 750.348086] [ T9870] [ 750.348088] [ T9870] dump_stack_lvl+0x76/0xa0 [ 750.348106] [ T9870] print_report+0xd1/0x640 [ 750.348116] [ T9870] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 750.348120] [ T9870] ? kasan_complete_mode_report_info+0x26/0x210 [ 750.348124] [ T9870] kasan_report+0xe7/0x130 [ 750.348128] [ T9870] ? smb_set_sge+0x2cc/0x3b0 [cifs] [ 750.348262] [ T9870] ? smb_set_sge+0x2cc/0x3b0 [cifs] [ 750.348377] [ T9870] __asan_report_store8_noabort+0x17/0x30 [ 750.348381] [ T9870] smb_set_sge+0x2cc/0x3b0 [cifs] [ 750.348496] [ T9870] smbd_post_send_iter+0x1990/0x3070 [cifs] [ 750.348625] [ T9870] ? __pfx_smbd_post_send_iter+0x10/0x10 [cifs] [ 750.348741] [ T9870] ? update_stack_state+0x2a0/0x670 [ 750.348749] [ T9870] ? cifs_flush+0x153/0x320 [cifs] [ 750.348870] [ T9870] ? cifs_flush+0x153/0x320 [cifs] [ 750.348990] [ T9870] ? update_stack_state+0x2a0/0x670 [ 750.348995] [ T9870] smbd_send+0x58c/0x9c0 [cifs] [ 750.349117] [ T9870] ? __pfx_smbd_send+0x10/0x10 [cifs] [ 750.349231] [ T9870] ? unwind_get_return_address+0x65/0xb0 [ 750.349235] [ T9870] ? __pfx_stack_trace_consume_entry+0x10/0x10 [ 750.349242] [ T9870] ? arch_stack_walk+0xa7/0x100 [ 750.349250] [ T9870] ? stack_trace_save+0x92/0xd0 [ 750.349254] [ T9870] __smb_send_rqst+0x931/0xec0 [cifs] [ 750.349374] [ T9870] ? kernel_text_address+0x173/0x190 [ 750.349379] [ T9870] ? kasan_save_stack+0x39/0x70 [ 750.349382] [ T9870] ? kasan_save_track+0x18/0x70 [ 750.349385] [ T9870] ? __kasan_slab_alloc+0x9d/0xa0 [ 750.349389] [ T9870] ? __pfx___smb_send_rqst+0x10/0x10 [cifs] [ 750.349508] [ T9870] ? smb2_mid_entry_alloc+0xb4/0x7e0 [cifs] [ 750.349626] [ T9870] ? cifs_call_async+0x277/0xb00 [cifs] [ 750.349746] [ T9870] ? cifs_issue_write+0x256/0x610 [cifs] [ 750.349867] [ T9870] ? netfs_do_issue_write+0xc2/0x340 [netfs] [ 750.349900] [ T9870] ? netfs_advance_write+0x45b/0x1270 [netfs] [ 750.349929] [ T9870] ? netfs_write_folio+0xd6c/0x1be0 [netfs] [ 750.349958] [ T9870] ? netfs_writepages+0x2e9/0xa80 [netfs] [ 750.349987] [ T9870] ? do_writepages+0x21f/0x590 [ 750.349993] [ T9870] ? filemap_fdatawrite_wbc+0xe1/0x140 [ 750.349997] [ T9870] ? entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 750.350002] [ T9870] smb_send_rqst+0x22e/0x2f0 [cifs] [ 750.350131] [ T9870] ? __pfx_smb_send_rqst+0x10/0x10 [cifs] [ 750.350255] [ T9870] ? local_clock_noinstr+0xe/0xd0 [ 750.350261] [ T9870] ? kasan_save_alloc_info+0x37/0x60 [ 750.350268] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.350271] [ T9870] ? _raw_spin_lock+0x81/0xf0 [ 750.350275] [ T9870] ? __pfx__raw_spin_lock+0x10/0x10 [ 750.350278] [ T9870] ? smb2_setup_async_request+0x293/0x580 [cifs] [ 750.350398] [ T9870] cifs_call_async+0x477/0xb00 [cifs] [ 750.350518] [ T9870] ? __pfx_smb2_writev_callback+0x10/0x10 [cifs] [ 750.350636] [ T9870] ? __pfx_cifs_call_async+0x10/0x10 [cifs] [ 750.350756] [ T9870] ? __pfx__raw_spin_lock+0x10/0x10 [ 750.350760] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.350763] [ T9870] ? __smb2_plain_req_init+0x933/0x1090 [cifs] [ 750.350891] [ T9870] smb2_async_writev+0x15ff/0x2460 [cifs] [ 750.351008] [ T9870] ? sched_clock_noinstr+0x9/0x10 [ 750.351012] [ T9870] ? local_clock_noinstr+0xe/0xd0 [ 750.351018] [ T9870] ? __pfx_smb2_async_writev+0x10/0x10 [cifs] [ 750.351144] [ T9870] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 750.351150] [ T9870] ? _raw_spin_unlock+0xe/0x40 [ 750.351154] [ T9870] ? cifs_pick_channel+0x242/0x370 [cifs] [ 750.351275] [ T9870] cifs_issue_write+0x256/0x610 [cifs] [ 750.351554] [ T9870] ? cifs_issue_write+0x256/0x610 [cifs] [ 750.351677] [ T9870] netfs_do_issue_write+0xc2/0x340 [netfs] [ 750.351710] [ T9870] netfs_advance_write+0x45b/0x1270 [netfs] [ 750.351740] [ T9870] ? rolling_buffer_append+0x12d/0x440 [netfs] [ 750.351769] [ T9870] netfs_write_folio+0xd6c/0x1be0 [netfs] [ 750.351798] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.351804] [ T9870] netfs_writepages+0x2e9/0xa80 [netfs] [ 750.351835] [ T9870] ? __pfx_netfs_writepages+0x10/0x10 [netfs] [ 750.351864] [ T9870] ? exit_files+0xab/0xe0 [ 750.351867] [ T9870] ? do_exit+0x148f/0x2980 [ 750.351871] [ T9870] ? do_group_exit+0xb5/0x250 [ 750.351874] [ T9870] ? arch_do_signal_or_restart+0x92/0x630 [ 750.351879] [ T9870] ? exit_to_user_mode_loop+0x98/0x170 [ 750.351882] [ T9870] ? do_syscall_64+0x2cf/0xd80 [ 750.351886] [ T9870] ? entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 750.351890] [ T9870] do_writepages+0x21f/0x590 [ 750.351894] [ T9870] ? __pfx_do_writepages+0x10/0x10 [ 750.351897] [ T9870] filemap_fdatawrite_wbc+0xe1/0x140 [ 750.351901] [ T9870] __filemap_fdatawrite_range+0xba/0x100 [ 750.351904] [ T9870] ? __pfx___filemap_fdatawrite_range+0x10/0x10 [ 750.351912] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.351916] [ T9870] filemap_write_and_wait_range+0x7d/0xf0 [ 750.351920] [ T9870] cifs_flush+0x153/0x320 [cifs] [ 750.352042] [ T9870] filp_flush+0x107/0x1a0 [ 750.352046] [ T9870] filp_close+0x14/0x30 [ 750.352049] [ T9870] put_files_struct.part.0+0x126/0x2a0 [ 750.352053] [ T9870] ? __pfx__raw_spin_lock+0x10/0x10 [ 750.352058] [ T9870] exit_files+0xab/0xe0 [ 750.352061] [ T9870] do_exit+0x148f/0x2980 [ 750.352065] [ T9870] ? __pfx_do_exit+0x10/0x10 [ 750.352069] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.352072] [ T9870] ? _raw_spin_lock_irq+0x8a/0xf0 [ 750.352076] [ T9870] do_group_exit+0xb5/0x250 [ 750.352080] [ T9870] get_signal+0x22d3/0x22e0 [ 750.352086] [ T9870] ? __pfx_get_signal+0x10/0x10 [ 750.352089] [ T9870] ? fpregs_assert_state_consistent+0x68/0x100 [ 750.352101] [ T9870] ? folio_add_lru+0xda/0x120 [ 750.352105] [ T9870] arch_do_signal_or_restart+0x92/0x630 [ 750.352109] [ T9870] ? __pfx_arch_do_signal_or_restart+0x10/0x10 [ 750.352115] [ T9870] exit_to_user_mode_loop+0x98/0x170 [ 750.352118] [ T9870] do_syscall_64+0x2cf/0xd80 [ 750.352123] [ T9870] ? __kasan_check_read+0x11/0x20 [ 750.352126] [ T9870] ? count_memcg_events+0x1b4/0x420 [ 750.352132] [ T9870] ? handle_mm_fault+0x148/0x690 [ 750.352136] [ T9870] ? _raw_spin_lock_irq+0x8a/0xf0 [ 750.352140] [ T9870] ? __kasan_check_read+0x11/0x20 [ 750.352143] [ T9870] ? fpregs_assert_state_consistent+0x68/0x100 [ 750.352146] [ T9870] ? irqentry_exit_to_user_mode+0x2e/0x250 [ 750.352151] [ T9870] ? irqentry_exit+0x43/0x50 [ 750.352154] [ T9870] ? exc_page_fault+0x75/0xe0 [ 750.352160] [ T9870] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 750.352163] [ T9870] RIP: 0033:0x7858c94ab6e2 [ 750.352167] [ T9870] Code: Unable to access opcode bytes at 0x7858c94ab6b8. [ 750.352175] [ T9870] RSP: 002b:00007858c9248ce8 EFLAGS: 00000246 ORIG_RAX: 0000000000000022 [ 750.352179] [ T9870] RAX: fffffffffffffdfe RBX: 00007858c92496c0 RCX: 00007858c94ab6e2 [ 750.352182] [ T9870] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 750.352184] [ T9870] RBP: 00007858c9248d10 R08: 0000000000000000 R09: 0000000000000000 [ 750.352185] [ T9870] R10: 0000000000000000 R11: 0000000000000246 R12: fffffffffffffde0 [ 750.352187] [ T9870] R13: 0000000000000020 R14: 0000000000000002 R15: 00007ffc072d2230 [ 750.352191] [ T9870] [ 750.352195] [ T9870] [ 750.395206] [ T9870] Allocated by task 9870 on cpu 0 at 750.346406s: [ 750.395523] [ T9870] kasan_save_stack+0x39/0x70 [ 750.395532] [ T9870] kasan_save_track+0x18/0x70 [ 750.395536] [ T9870] kasan_save_alloc_info+0x37/0x60 [ 750.395539] [ T9870] __kasan_slab_alloc+0x9d/0xa0 [ 750.395543] [ T9870] kmem_cache_alloc_noprof+0x13c/0x3f0 [ 750.395548] [ T9870] mempool_alloc_slab+0x15/0x20 [ 750.395553] [ T9870] mempool_alloc_noprof+0x135/0x340 [ 750.395557] [ T9870] smbd_post_send_iter+0x63e/0x3070 [cifs] [ 750.395694] [ T9870] smbd_send+0x58c/0x9c0 [cifs] [ 750.395819] [ T9870] __smb_send_rqst+0x931/0xec0 [cifs] [ 750.395950] [ T9870] smb_send_rqst+0x22e/0x2f0 [cifs] [ 750.396081] [ T9870] cifs_call_async+0x477/0xb00 [cifs] [ 750.396232] [ T9870] smb2_async_writev+0x15ff/0x2460 [cifs] [ 750.396359] [ T9870] cifs_issue_write+0x256/0x610 [cifs] [ 750.396492] [ T9870] netfs_do_issue_write+0xc2/0x340 [netfs] [ 750.396544] [ T9870] netfs_advance_write+0x45b/0x1270 [netfs] [ 750.396576] [ T9870] netfs_write_folio+0xd6c/0x1be0 [netfs] [ 750.396608] [ T9870] netfs_writepages+0x2e9/0xa80 [netfs] [ 750.396639] [ T9870] do_writepages+0x21f/0x590 [ 750.396643] [ T9870] filemap_fdatawrite_wbc+0xe1/0x140 [ 750.396647] [ T9870] __filemap_fdatawrite_range+0xba/0x100 [ 750.396651] [ T9870] filemap_write_and_wait_range+0x7d/0xf0 [ 750.396656] [ T9870] cifs_flush+0x153/0x320 [cifs] [ 750.396787] [ T9870] filp_flush+0x107/0x1a0 [ 750.396791] [ T9870] filp_close+0x14/0x30 [ 750.396795] [ T9870] put_files_struct.part.0+0x126/0x2a0 [ 750.396800] [ T9870] exit_files+0xab/0xe0 [ 750.396803] [ T9870] do_exit+0x148f/0x2980 [ 750.396808] [ T9870] do_group_exit+0xb5/0x250 [ 750.396813] [ T9870] get_signal+0x22d3/0x22e0 [ 750.396817] [ T9870] arch_do_signal_or_restart+0x92/0x630 [ 750.396822] [ T9870] exit_to_user_mode_loop+0x98/0x170 [ 750.396827] [ T9870] do_syscall_64+0x2cf/0xd80 [ 750.396832] [ T9870] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 750.396836] [ T9870] [ 750.397150] [ T9870] The buggy address belongs to the object at ffff888011082800 which belongs to the cache smbd_request_0000000008f3bd7b of size 144 [ 750.397798] [ T9870] The buggy address is located 0 bytes to the right of allocated 144-byte region [ffff888011082800, ffff888011082890) [ 750.398469] [ T9870] [ 750.398800] [ T9870] The buggy address belongs to the physical page: [ 750.399141] [ T9870] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x11082 [ 750.399148] [ T9870] flags: 0xfffffc0000000(node=0|zone=1|lastcpupid=0x1fffff) [ 750.399155] [ T9870] page_type: f5(slab) [ 750.399161] [ T9870] raw: 000fffffc0000000 ffff888022d65640 dead000000000122 0000000000000000 [ 750.399165] [ T9870] raw: 0000000000000000 0000000080100010 00000000f5000000 0000000000000000 [ 750.399169] [ T9870] page dumped because: kasan: bad access detected [ 750.399172] [ T9870] [ 750.399505] [ T9870] Memory state around the buggy address: [ 750.399863] [ T9870] ffff888011082780: fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 750.400247] [ T9870] ffff888011082800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 750.400618] [ T9870] >ffff888011082880: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 750.400982] [ T9870] ^ [ 750.401370] [ T9870] ffff888011082900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 750.401774] [ T9870] ffff888011082980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 750.402171] [ T9870] ================================================================== [ 750.402696] [ T9870] Disabling lock debugging due to kernel taint [ 750.403202] [ T9870] BUG: unable to handle page fault for address: ffff8880110a2000 [ 750.403797] [ T9870] #PF: supervisor write access in kernel mode [ 750.404204] [ T9870] #PF: error_code(0x0003) - permissions violation [ 750.404581] [ T9870] PGD 5ce01067 P4D 5ce01067 PUD 5ce02067 PMD 78aa063 PTE 80000000110a2021 [ 750.404969] [ T9870] Oops: Oops: 0003 [#1] SMP KASAN PTI [ 750.405394] [ T9870] CPU: 0 UID: 0 PID: 9870 Comm: xfs_io Kdump: loaded Tainted: G B 6.16.0-rc2-metze.02+ #1 PREEMPT(voluntary) [ 750.406510] [ T9870] Tainted: [B]=BAD_PAGE [ 750.406967] [ T9870] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 750.407440] [ T9870] RIP: 0010:smb_set_sge+0x15c/0x3b0 [cifs] [ 750.408065] [ T9870] Code: 48 83 f8 ff 0f 84 b0 00 00 00 48 ba 00 00 00 00 00 fc ff df 4c 89 e1 48 c1 e9 03 80 3c 11 00 0f 85 69 01 00 00 49 8d 7c 24 08 <49> 89 04 24 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 0f [ 750.409283] [ T9870] RSP: 0018:ffffc90005e2e758 EFLAGS: 00010246 [ 750.409803] [ T9870] RAX: ffff888036c53400 RBX: ffffc90005e2e878 RCX: 1ffff11002214400 [ 750.410323] [ T9870] RDX: dffffc0000000000 RSI: dffffc0000000000 RDI: ffff8880110a2008 [ 750.411217] [ T9870] RBP: ffffc90005e2e798 R08: 0000000000000001 R09: 0000000000000400 [ 750.411770] [ T9870] R10: ffff888011082800 R11: 0000000000000000 R12: ffff8880110a2000 [ 750.412325] [ T9870] R13: 0000000000000000 R14: ffffc90005e2e888 R15: ffff88801a4b6000 [ 750.412901] [ T9870] FS: 0000000000000000(0000) GS:ffff88812bc68000(0000) knlGS:0000000000000000 [ 750.413477] [ T9870] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 750.414077] [ T9870] CR2: ffff8880110a2000 CR3: 000000005b0a6005 CR4: 00000000000726f0 [ 750.414654] [ T9870] Call Trace: [ 750.415211] [ T9870] [ 750.415748] [ T9870] smbd_post_send_iter+0x1990/0x3070 [cifs] [ 750.416449] [ T9870] ? __pfx_smbd_post_send_iter+0x10/0x10 [cifs] [ 750.417128] [ T9870] ? update_stack_state+0x2a0/0x670 [ 750.417685] [ T9870] ? cifs_flush+0x153/0x320 [cifs] [ 750.418380] [ T9870] ? cifs_flush+0x153/0x320 [cifs] [ 750.419055] [ T9870] ? update_stack_state+0x2a0/0x670 [ 750.419624] [ T9870] smbd_send+0x58c/0x9c0 [cifs] [ 750.420297] [ T9870] ? __pfx_smbd_send+0x10/0x10 [cifs] [ 750.420936] [ T9870] ? unwind_get_return_address+0x65/0xb0 [ 750.421456] [ T9870] ? __pfx_stack_trace_consume_entry+0x10/0x10 [ 750.421954] [ T9870] ? arch_stack_walk+0xa7/0x100 [ 750.422460] [ T9870] ? stack_trace_save+0x92/0xd0 [ 750.422948] [ T9870] __smb_send_rqst+0x931/0xec0 [cifs] [ 750.423579] [ T9870] ? kernel_text_address+0x173/0x190 [ 750.424056] [ T9870] ? kasan_save_stack+0x39/0x70 [ 750.424813] [ T9870] ? kasan_save_track+0x18/0x70 [ 750.425323] [ T9870] ? __kasan_slab_alloc+0x9d/0xa0 [ 750.425831] [ T9870] ? __pfx___smb_send_rqst+0x10/0x10 [cifs] [ 750.426548] [ T9870] ? smb2_mid_entry_alloc+0xb4/0x7e0 [cifs] [ 750.427231] [ T9870] ? cifs_call_async+0x277/0xb00 [cifs] [ 750.427882] [ T9870] ? cifs_issue_write+0x256/0x610 [cifs] [ 750.428909] [ T9870] ? netfs_do_issue_write+0xc2/0x340 [netfs] [ 750.429425] [ T9870] ? netfs_advance_write+0x45b/0x1270 [netfs] [ 750.429882] [ T9870] ? netfs_write_folio+0xd6c/0x1be0 [netfs] [ 750.430345] [ T9870] ? netfs_writepages+0x2e9/0xa80 [netfs] [ 750.430809] [ T9870] ? do_writepages+0x21f/0x590 [ 750.431239] [ T9870] ? filemap_fdatawrite_wbc+0xe1/0x140 [ 750.431652] [ T9870] ? entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 750.432041] [ T9870] smb_send_rqst+0x22e/0x2f0 [cifs] [ 750.432586] [ T9870] ? __pfx_smb_send_rqst+0x10/0x10 [cifs] [ 750.433108] [ T9870] ? local_clock_noinstr+0xe/0xd0 [ 750.433482] [ T9870] ? kasan_save_alloc_info+0x37/0x60 [ 750.433855] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.434214] [ T9870] ? _raw_spin_lock+0x81/0xf0 [ 750.434561] [ T9870] ? __pfx__raw_spin_lock+0x10/0x10 [ 750.434903] [ T9870] ? smb2_setup_async_request+0x293/0x580 [cifs] [ 750.435394] [ T9870] cifs_call_async+0x477/0xb00 [cifs] [ 750.435892] [ T9870] ? __pfx_smb2_writev_callback+0x10/0x10 [cifs] [ 750.436388] [ T9870] ? __pfx_cifs_call_async+0x10/0x10 [cifs] [ 750.436881] [ T9870] ? __pfx__raw_spin_lock+0x10/0x10 [ 750.437237] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.437579] [ T9870] ? __smb2_plain_req_init+0x933/0x1090 [cifs] [ 750.438062] [ T9870] smb2_async_writev+0x15ff/0x2460 [cifs] [ 750.438557] [ T9870] ? sched_clock_noinstr+0x9/0x10 [ 750.438906] [ T9870] ? local_clock_noinstr+0xe/0xd0 [ 750.439293] [ T9870] ? __pfx_smb2_async_writev+0x10/0x10 [cifs] [ 750.439786] [ T9870] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 750.440143] [ T9870] ? _raw_spin_unlock+0xe/0x40 [ 750.440495] [ T9870] ? cifs_pick_channel+0x242/0x370 [cifs] [ 750.440989] [ T9870] cifs_issue_write+0x256/0x610 [cifs] [ 750.441492] [ T9870] ? cifs_issue_write+0x256/0x610 [cifs] [ 750.441987] [ T9870] netfs_do_issue_write+0xc2/0x340 [netfs] [ 750.442387] [ T9870] netfs_advance_write+0x45b/0x1270 [netfs] [ 750.442969] [ T9870] ? rolling_buffer_append+0x12d/0x440 [netfs] [ 750.443376] [ T9870] netfs_write_folio+0xd6c/0x1be0 [netfs] [ 750.443768] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.444145] [ T9870] netfs_writepages+0x2e9/0xa80 [netfs] [ 750.444541] [ T9870] ? __pfx_netfs_writepages+0x10/0x10 [netfs] [ 750.444936] [ T9870] ? exit_files+0xab/0xe0 [ 750.445312] [ T9870] ? do_exit+0x148f/0x2980 [ 750.445672] [ T9870] ? do_group_exit+0xb5/0x250 [ 750.446028] [ T9870] ? arch_do_signal_or_restart+0x92/0x630 [ 750.446402] [ T9870] ? exit_to_user_mode_loop+0x98/0x170 [ 750.446762] [ T9870] ? do_syscall_64+0x2cf/0xd80 [ 750.447132] [ T9870] ? entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 750.447499] [ T9870] do_writepages+0x21f/0x590 [ 750.447859] [ T9870] ? __pfx_do_writepages+0x10/0x10 [ 750.448236] [ T9870] filemap_fdatawrite_wbc+0xe1/0x140 [ 750.448595] [ T9870] __filemap_fdatawrite_range+0xba/0x100 [ 750.448953] [ T9870] ? __pfx___filemap_fdatawrite_range+0x10/0x10 [ 750.449336] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.449697] [ T9870] filemap_write_and_wait_range+0x7d/0xf0 [ 750.450062] [ T9870] cifs_flush+0x153/0x320 [cifs] [ 750.450592] [ T9870] filp_flush+0x107/0x1a0 [ 750.450952] [ T9870] filp_close+0x14/0x30 [ 750.451322] [ T9870] put_files_struct.part.0+0x126/0x2a0 [ 750.451678] [ T9870] ? __pfx__raw_spin_lock+0x10/0x10 [ 750.452033] [ T9870] exit_files+0xab/0xe0 [ 750.452401] [ T9870] do_exit+0x148f/0x2980 [ 750.452751] [ T9870] ? __pfx_do_exit+0x10/0x10 [ 750.453109] [ T9870] ? __kasan_check_write+0x14/0x30 [ 750.453459] [ T9870] ? _raw_spin_lock_irq+0x8a/0xf0 [ 750.453787] [ T9870] do_group_exit+0xb5/0x250 [ 750.454082] [ T9870] get_signal+0x22d3/0x22e0 [ 750.454406] [ T9870] ? __pfx_get_signal+0x10/0x10 [ 750.454709] [ T9870] ? fpregs_assert_state_consistent+0x68/0x100 [ 750.455031] [ T9870] ? folio_add_lru+0xda/0x120 [ 750.455347] [ T9870] arch_do_signal_or_restart+0x92/0x630 [ 750.455656] [ T9870] ? __pfx_arch_do_signal_or_restart+0x10/0x10 [ 750.455967] [ T9870] exit_to_user_mode_loop+0x98/0x170 [ 750.456282] [ T9870] do_syscall_64+0x2cf/0xd80 [ 750.456591] [ T9870] ? __kasan_check_read+0x11/0x20 [ 750.456897] [ T9870] ? count_memcg_events+0x1b4/0x420 [ 750.457280] [ T9870] ? handle_mm_fault+0x148/0x690 [ 750.457616] [ T9870] ? _raw_spin_lock_irq+0x8a/0xf0 [ 750.457925] [ T9870] ? __kasan_check_read+0x11/0x20 [ 750.458297] [ T9870] ? fpregs_assert_state_consistent+0x68/0x100 [ 750.458672] [ T9870] ? irqentry_exit_to_user_mode+0x2e/0x250 [ 750.459191] [ T9870] ? irqentry_exit+0x43/0x50 [ 750.459600] [ T9870] ? exc_page_fault+0x75/0xe0 [ 750.460130] [ T9870] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 750.460570] [ T9870] RIP: 0033:0x7858c94ab6e2 [ 750.461206] [ T9870] Code: Unable to access opcode bytes at 0x7858c94ab6b8. [ 750.461780] [ T9870] RSP: 002b:00007858c9248ce8 EFLAGS: 00000246 ORIG_RAX: 0000000000000022 [ 750.462327] [ T9870] RAX: fffffffffffffdfe RBX: 00007858c92496c0 RCX: 00007858c94ab6e2 [ 750.462653] [ T9870] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 750.462969] [ T9870] RBP: 00007858c9248d10 R08: 0000000000000000 R09: 0000000000000000 [ 750.463290] [ T9870] R10: 0000000000000000 R11: 0000000000000246 R12: fffffffffffffde0 [ 750.463640] [ T9870] R13: 0000000000000020 R14: 0000000000000002 R15: 00007ffc072d2230 [ 750.463965] [ T9870] [ 750.464285] [ T9870] Modules linked in: siw ib_uverbs ccm cmac nls_utf8 cifs cifs_arc4 nls_ucs2_utils rdma_cm iw_cm ib_cm ib_core cifs_md4 netfs softdog vboxsf vboxguest cpuid intel_rapl_msr intel_rapl_common intel_uncore_frequency_common intel_pmc_core pmt_telemetry pmt_class intel_pmc_ssram_telemetry intel_vsec polyval_clmulni ghash_clmulni_intel sha1_ssse3 aesni_intel rapl i2c_piix4 i2c_smbus joydev input_leds mac_hid sunrpc binfmt_misc kvm_intel kvm irqbypass sch_fq_codel efi_pstore nfnetlink vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock vmw_vmci dmi_sysfs ip_tables x_tables autofs4 hid_generic vboxvideo usbhid drm_vram_helper psmouse vga16fb vgastate drm_ttm_helper serio_raw hid ahci libahci ttm pata_acpi video wmi [last unloaded: vboxguest] [ 750.467127] [ T9870] CR2: ffff8880110a2000 cc: Tom Talpey cc: linux-cifs@vger.kernel.org Reviewed-by: David Howells Reviewed-by: Tom Talpey Fixes: c45ebd636c32 ("cifs: Provide the capability to extract from ITER_FOLIOQ to RDMA SGEs") Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5ae847919da5..cbc85bca006f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2589,13 +2589,14 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, size_t fsize = folioq_folio_size(folioq, slot); if (offset < fsize) { - size_t part = umin(maxsize - ret, fsize - offset); + size_t part = umin(maxsize, fsize - offset); if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) return -EIO; offset += part; ret += part; + maxsize -= part; } if (offset >= fsize) { @@ -2610,7 +2611,7 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, slot = 0; } } - } while (rdma->nr_sge < rdma->max_sge || maxsize > 0); + } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); iter->folioq = folioq; iter->folioq_slot = slot; From 2c4fd3d141465ef1afc8318c95e7b916d9a8c49c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 18 Jun 2025 16:39:47 +0100 Subject: [PATCH 485/497] cifs: Fix prepare_write to negotiate wsize if needed Fix cifs_prepare_write() to negotiate the wsize if it is unset. Reviewed-by: Shyam Prasad N Reviewed-by: Bharath SM Signed-off-by: David Howells cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/file.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9835672267d2..e9212da32f01 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -52,6 +52,7 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq) struct netfs_io_stream *stream = &req->rreq.io_streams[subreq->stream_nr]; struct TCP_Server_Info *server; struct cifsFileInfo *open_file = req->cfile; + struct cifs_sb_info *cifs_sb = CIFS_SB(wdata->rreq->inode->i_sb); size_t wsize = req->rreq.wsize; int rc; @@ -63,6 +64,10 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); wdata->server = server; + if (cifs_sb->ctx->wsize == 0) + cifs_negotiate_wsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); + retry: if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, false); @@ -160,10 +165,9 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) { + if (cifs_sb->ctx->rsize == 0) cifs_negotiate_rsize(server, cifs_sb->ctx, tlink_tcon(req->cfile->tlink)); - } rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); From a2182743a8b4969481f64aec4908ff162e8a206c Mon Sep 17 00:00:00 2001 From: Paul Aurich Date: Wed, 20 Nov 2024 08:01:54 -0800 Subject: [PATCH 486/497] smb: Log an error when close_all_cached_dirs fails Under low-memory conditions, close_all_cached_dirs() can't move the dentries to a separate list to dput() them once the locks are dropped. This will result in a "Dentry still in use" error, so add an error message that makes it clear this is what happened: [ 495.281119] CIFS: VFS: \\otters.example.com\share Out of memory while dropping dentries [ 495.281595] ------------[ cut here ]------------ [ 495.281887] BUG: Dentry ffff888115531138{i=78,n=/} still in use (2) [unmount of cifs cifs] [ 495.282391] WARNING: CPU: 1 PID: 2329 at fs/dcache.c:1536 umount_check+0xc8/0xf0 Also, bail out of looping through all tcons as soon as a single allocation fails, since we're already in trouble, and kmalloc() attempts for subseqeuent tcons are likely to fail just like the first one did. Signed-off-by: Paul Aurich Acked-by: Bharath SM Suggested-by: Ruben Devos Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5200a0f3cafc..368e870624da 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -509,8 +509,17 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { tmp_list = kmalloc(sizeof(*tmp_list), GFP_ATOMIC); - if (tmp_list == NULL) - break; + if (tmp_list == NULL) { + /* + * If the malloc() fails, we won't drop all + * dentries, and unmounting is likely to trigger + * a 'Dentry still in use' error. + */ + cifs_tcon_dbg(VFS, "Out of memory while dropping dentries\n"); + spin_unlock(&cfids->cfid_list_lock); + spin_unlock(&cifs_sb->tlink_tree_lock); + goto done; + } spin_lock(&cfid->fid_lock); tmp_list->dentry = cfid->dentry; cfid->dentry = NULL; @@ -522,6 +531,7 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) } spin_unlock(&cifs_sb->tlink_tree_lock); +done: list_for_each_entry_safe(tmp_list, q, &entry, entry) { list_del(&tmp_list->entry); dput(tmp_list->dentry); From 4eb11a34b72c86d559f437fdadc47e512bba41d2 Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Thu, 19 Jun 2025 21:05:32 +0530 Subject: [PATCH 487/497] smb: Use loff_t for directory position in cached_dirents Change the pos field in struct cached_dirents from int to loff_t to support large directory offsets. This avoids overflow and matches kernel conventions for directory positions. Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Bharath SM Signed-off-by: Steve French --- fs/smb/client/cached_dir.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index bc8a812ff95f..a28f7cae3caa 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -26,7 +26,7 @@ struct cached_dirents { * open file instance. */ struct mutex de_mutex; - int pos; /* Expected ctx->pos */ + loff_t pos; /* Expected ctx->pos */ struct list_head entries; }; From 4d360cfe8cbac3b62bd9e1df9889bde8a9d5b1d7 Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Thu, 19 Jun 2025 21:05:33 +0530 Subject: [PATCH 488/497] smb: minor fix to use sizeof to initialize flags_string buffer Replaced hardcoded length with sizeof(flags_string). Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Bharath SM Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c0196be0e65f..3fdf75737d43 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1105,7 +1105,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, if ((count < 1) || (count > 11)) return -EINVAL; - memset(flags_string, 0, 12); + memset(flags_string, 0, sizeof(flags_string)); if (copy_from_user(flags_string, buffer, count)) return -EFAULT; From 27e9d5d021dbaa1211836d07a240078bf84b284e Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Thu, 19 Jun 2025 21:05:34 +0530 Subject: [PATCH 489/497] smb: minor fix to use SMB2_NTLMV2_SESSKEY_SIZE for auth_key size Replaced hardcoded value 16 with SMB2_NTLMV2_SESSKEY_SIZE in the auth_key definition and memcpy call. Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Bharath SM Signed-off-by: Steve French --- fs/smb/client/cifs_ioctl.h | 2 +- fs/smb/client/ioctl.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index 26327442e383..b51ce64fcccf 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h @@ -61,7 +61,7 @@ struct smb_query_info { struct smb3_key_debug_info { __u64 Suid; __u16 cipher_type; - __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */ + __u8 auth_key[SMB2_NTLMV2_SESSKEY_SIZE]; __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; } __packed; diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 56439da4f119..0a9935ce05a5 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -506,7 +506,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) le16_to_cpu(tcon->ses->server->cipher_type); pkey_inf.Suid = tcon->ses->Suid; memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response, - 16 /* SMB2_NTLMV2_SESSKEY_SIZE */); + SMB2_NTLMV2_SESSKEY_SIZE); memcpy(pkey_inf.smb3decryptionkey, tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE); memcpy(pkey_inf.smb3encryptionkey, From a6c23dac756b9541b33aa3bcd30f464df2879209 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 16 Jun 2025 07:51:36 -0500 Subject: [PATCH 490/497] i2c: k1: check for transfer error If spacemit_i2c_xfer_msg() times out waiting for a message transfer to complete, or if the hardware reports an error, it returns a negative error code (-ETIMEDOUT, -EAGAIN, -ENXIO. or -EIO). The sole caller of spacemit_i2c_xfer_msg() is spacemit_i2c_xfer(), which is the i2c_algorithm->xfer callback function. It currently does not save the value returned by spacemit_i2c_xfer_msg(). The result is that transfer errors go unreported, and a caller has no indication anything is wrong. When this code was out for review, the return value *was* checked in early versions. But for some reason, that assignment got dropped between versions 5 and 6 of the series, perhaps related to reworking the code to merge spacemit_i2c_xfer_core() into spacemit_i2c_xfer(). Simply assigning the value returned to "ret" fixes the problem. Fixes: 5ea558473fa31 ("i2c: spacemit: add support for SpacemiT K1 SoC") Signed-off-by: Alex Elder Cc: # v6.15+ Reviewed-by: Troy Mitchell Link: https://lore.kernel.org/r/20250616125137.1555453-1-elder@riscstar.com Signed-off-by: Andi Shyti Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-k1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-k1.c b/drivers/i2c/busses/i2c-k1.c index 5965b4cf6220..b68a21fff0b5 100644 --- a/drivers/i2c/busses/i2c-k1.c +++ b/drivers/i2c/busses/i2c-k1.c @@ -477,7 +477,7 @@ static int spacemit_i2c_xfer(struct i2c_adapter *adapt, struct i2c_msg *msgs, in ret = spacemit_i2c_wait_bus_idle(i2c); if (!ret) - spacemit_i2c_xfer_msg(i2c); + ret = spacemit_i2c_xfer_msg(i2c); else if (ret < 0) dev_dbg(i2c->dev, "i2c transfer error: %d\n", ret); else From 86731a2a651e58953fc949573895f2fa6d456841 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Jun 2025 13:30:08 -0700 Subject: [PATCH 491/497] Linux 6.16-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ba0827a1fccd..f884dfe10246 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* From 999fb9d51f939ee23cbb9313ae558d29d6987804 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 17 Jun 2025 14:20:12 +0200 Subject: [PATCH 492/497] ASoC: qcom: sm8250: Fix possibly undefined reference With CONFIG_SND_SOC_SM8250=y and CONFIG_SND_SOC_QCOM_OFFLOAD_UTILS=m selected in kconfig, the build will fail due to trying to link against a symbol only found in the module. aarch64-linux-gnu-ld: sound/soc/qcom/sm8250.o: in function `sm8250_snd_exit': sound/soc/qcom/sm8250.c:52:(.text+0x210): undefined reference to `qcom_snd_usb_offload_jack_remove' Fix this by declaring the dependency that forces CONFIG_SND_SOC_SM8250=m when CONFIG_SND_SOC_QCOM_OFFLOAD_UTILS is =m. Reported-by: Matthew Croughan Fixes: 1b8d0d87b934 ("ASoC: qcom: qdsp6: Add headphone jack for offload connection status") Signed-off-by: Luca Weiss Link: https://patch.msgid.link/20250617-snd-sm8250-dep-fix-v1-1-879af8906ec4@fairphone.com Signed-off-by: Mark Brown --- sound/soc/qcom/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index e86b4a03dd61..3d9ba13ee1e5 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -186,6 +186,7 @@ config SND_SOC_SM8250 tristate "SoC Machine driver for SM8250 boards" depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK + depends on SND_SOC_QCOM_OFFLOAD_UTILS || !SND_SOC_QCOM_OFFLOAD_UTILS select SND_SOC_QDSP6 select SND_SOC_QCOM_COMMON select SND_SOC_QCOM_SDW From 7186b81807b4a08f8bf834b6bdc72d6ed8ba1587 Mon Sep 17 00:00:00 2001 From: Yuzuru10 Date: Sun, 22 Jun 2025 22:58:00 +0000 Subject: [PATCH 493/497] ASoC: amd: yc: add quirk for Acer Nitro ANV15-41 internal mic This patch adds DMI-based quirk for the Acer Nitro ANV15-41, allowing the internal microphone to be detected correctly on machines with "RB" as board vendor. Signed-off-by: Yuzuru Link: https://patch.msgid.link/20250622225754.20856-1-yuzuru_10@proton.me Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 98022e5fd428..499f9f7c76ee 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -353,6 +353,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "83Q3"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "RB"), + DMI_MATCH(DMI_PRODUCT_NAME, "Nitro ANV15-41"), + } + }, { .driver_data = &acp6x_card, .matches = { From bf39286adc5e10ce3e32eb86ad316ae56f3b52a0 Mon Sep 17 00:00:00 2001 From: Oliver Schramm Date: Sun, 22 Jun 2025 00:30:01 +0200 Subject: [PATCH 494/497] ASoC: amd: yc: Add DMI quirk for Lenovo IdeaPad Slim 5 15 It's smaller brother has already received the patch to enable the microphone, now add it too to the DMI quirk table. Cc: stable@vger.kernel.org Signed-off-by: Oliver Schramm Link: https://patch.msgid.link/20250621223000.11817-2-oliver.schramm97@gmail.com Signed-off-by: Mark Brown --- sound/soc/amd/yc/acp6x-mach.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 499f9f7c76ee..97e340140d0c 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -367,6 +367,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "83J2"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "83J3"), + } + }, { .driver_data = &acp6x_card, .matches = { From dc6458ed95e40146699f9c523e34cb13ff127170 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Mon, 23 Jun 2025 14:14:55 +0530 Subject: [PATCH 495/497] ASoC: amd: ps: fix for soundwire failures during hibernation exit sequence During the hibernate entry sequence, ACP registers will be reset to default values and acp ip will be completely powered off including acp SoundWire pads. During resume sequence, if acp SoundWire pad keeper enable register is not restored along with pad pulldown control register value, then SoundWire manager links won't be powered on correctly results in peripheral register access failures and completely audio function is broken. Add code to store the acp SoundWire pad keeper enable register and acp pad pulldown ctrl register values before entering into suspend state and restore the register values during resume sequence based on condition check for acp SoundWire pad keeper enable register for ACP6.3, ACP7.0 & ACP7.1 platforms. Fixes: 491628388005 ("ASoC: amd: ps: add callback functions for acp pci driver pm ops") Signed-off-by: Vijendar Mukunda Link: https://patch.msgid.link/20250623084630.3100279-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/ps/acp63.h | 4 ++++ sound/soc/amd/ps/ps-common.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/sound/soc/amd/ps/acp63.h b/sound/soc/amd/ps/acp63.h index 85feae45c44c..d7c994e26e4d 100644 --- a/sound/soc/amd/ps/acp63.h +++ b/sound/soc/amd/ps/acp63.h @@ -334,6 +334,8 @@ struct acp_hw_ops { * @addr: pci ioremap address * @reg_range: ACP reigister range * @acp_rev: ACP PCI revision id + * @acp_sw_pad_keeper_en: store acp SoundWire pad keeper enable register value + * @acp_pad_pulldown_ctrl: store acp pad pulldown control register value * @acp63_sdw0-dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire * manager-SW0 instance * @acp63_sdw_dma_intr_stat: DMA interrupt status array for ACP6.3 platform SoundWire @@ -367,6 +369,8 @@ struct acp63_dev_data { u32 addr; u32 reg_range; u32 acp_rev; + u32 acp_sw_pad_keeper_en; + u32 acp_pad_pulldown_ctrl; u16 acp63_sdw0_dma_intr_stat[ACP63_SDW0_DMA_MAX_STREAMS]; u16 acp63_sdw1_dma_intr_stat[ACP63_SDW1_DMA_MAX_STREAMS]; u16 acp70_sdw0_dma_intr_stat[ACP70_SDW0_DMA_MAX_STREAMS]; diff --git a/sound/soc/amd/ps/ps-common.c b/sound/soc/amd/ps/ps-common.c index 1c89fb5fe1da..7b4966b75dc6 100644 --- a/sound/soc/amd/ps/ps-common.c +++ b/sound/soc/amd/ps/ps-common.c @@ -160,6 +160,8 @@ static int __maybe_unused snd_acp63_suspend(struct device *dev) adata = dev_get_drvdata(dev); if (adata->is_sdw_dev) { + adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); adata->sdw_en_stat = check_acp_sdw_enable_status(adata); if (adata->sdw_en_stat) { writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL); @@ -197,6 +199,7 @@ static int __maybe_unused snd_acp63_runtime_resume(struct device *dev) static int __maybe_unused snd_acp63_resume(struct device *dev) { struct acp63_dev_data *adata; + u32 acp_sw_pad_keeper_en; int ret; adata = dev_get_drvdata(dev); @@ -209,6 +212,12 @@ static int __maybe_unused snd_acp63_resume(struct device *dev) if (ret) dev_err(dev, "ACP init failed\n"); + acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en); + if (!acp_sw_pad_keeper_en) { + writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); + } return ret; } @@ -408,6 +417,8 @@ static int __maybe_unused snd_acp70_suspend(struct device *dev) adata = dev_get_drvdata(dev); if (adata->is_sdw_dev) { + adata->acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + adata->acp_pad_pulldown_ctrl = readl(adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); adata->sdw_en_stat = check_acp_sdw_enable_status(adata); if (adata->sdw_en_stat) { writel(1, adata->acp63_base + ACP_ZSC_DSP_CTRL); @@ -445,6 +456,7 @@ static int __maybe_unused snd_acp70_runtime_resume(struct device *dev) static int __maybe_unused snd_acp70_resume(struct device *dev) { struct acp63_dev_data *adata; + u32 acp_sw_pad_keeper_en; int ret; adata = dev_get_drvdata(dev); @@ -459,6 +471,12 @@ static int __maybe_unused snd_acp70_resume(struct device *dev) if (ret) dev_err(dev, "ACP init failed\n"); + acp_sw_pad_keeper_en = readl(adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + dev_dbg(dev, "ACP_SW0_PAD_KEEPER_EN:0x%x\n", acp_sw_pad_keeper_en); + if (!acp_sw_pad_keeper_en) { + writel(adata->acp_sw_pad_keeper_en, adata->acp63_base + ACP_SW0_PAD_KEEPER_EN); + writel(adata->acp_pad_pulldown_ctrl, adata->acp63_base + ACP_PAD_PULLDOWN_CTRL); + } return ret; } From ff21a6ec0f27c126db0a86d96751bd6e5d1d9874 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Tue, 24 Jun 2025 02:59:28 +0000 Subject: [PATCH 496/497] ASoC: rt721-sdca: fix boost gain calculation error Fix the boost gain calculation error in rt721_sdca_set_gain_get. This patch is specific for "FU33 Boost Volume". Signed-off-by: Jack Yu Link: https://patch.msgid.link/1b18fcde41c64d6fa85451d523c0434a@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt721-sdca.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index 1c9f32e405cf..ba080957e933 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -430,6 +430,7 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, unsigned int read_l, read_r, ctl_l = 0, ctl_r = 0; unsigned int adc_vol_flag = 0; const unsigned int interval_offset = 0xc0; + const unsigned int tendA = 0x200; const unsigned int tendB = 0xa00; if (strstr(ucontrol->id.name, "FU1E Capture Volume") || @@ -439,9 +440,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, regmap_read(rt721->mbq_regmap, mc->reg, &read_l); regmap_read(rt721->mbq_regmap, mc->rreg, &read_r); - if (mc->shift == 8) /* boost gain */ + if (mc->shift == 8) { + /* boost gain */ ctl_l = read_l / tendB; - else { + } else if (mc->shift == 1) { + /* FU33 boost gain */ + if (read_l == 0x8000 || read_l == 0xfe00) + ctl_l = 0; + else + ctl_l = read_l / tendA + 1; + } else { if (adc_vol_flag) ctl_l = mc->max - (((0x1e00 - read_l) & 0xffff) / interval_offset); else @@ -449,9 +457,16 @@ static int rt721_sdca_set_gain_get(struct snd_kcontrol *kcontrol, } if (read_l != read_r) { - if (mc->shift == 8) /* boost gain */ + if (mc->shift == 8) { + /* boost gain */ ctl_r = read_r / tendB; - else { /* ADC/DAC gain */ + } else if (mc->shift == 1) { + /* FU33 boost gain */ + if (read_r == 0x8000 || read_r == 0xfe00) + ctl_r = 0; + else + ctl_r = read_r / tendA + 1; + } else { /* ADC/DAC gain */ if (adc_vol_flag) ctl_r = mc->max - (((0x1e00 - read_r) & 0xffff) / interval_offset); else From 6c038b58a2dc5a008c7e7a1297f5aaa4deaaaa7e Mon Sep 17 00:00:00 2001 From: Tamura Dai Date: Mon, 16 Jun 2025 08:55:48 +0900 Subject: [PATCH 497/497] ASoC: SOF: Intel: hda: Use devm_kstrdup() to avoid memleak. sof_pdata->tplg_filename can have address allocated by kstrdup() and can be overwritten. Memory leak was detected with kmemleak: unreferenced object 0xffff88812391ff60 (size 16): comm "kworker/4:1", pid 161, jiffies 4294802931 hex dump (first 16 bytes): 73 6f 66 2d 68 64 61 2d 67 65 6e 65 72 69 63 00 sof-hda-generic. backtrace (crc 4bf1675c): __kmalloc_node_track_caller_noprof+0x49c/0x6b0 kstrdup+0x46/0xc0 hda_machine_select.cold+0x1de/0x12cf [snd_sof_intel_hda_generic] sof_init_environment+0x16f/0xb50 [snd_sof] sof_probe_continue+0x45/0x7c0 [snd_sof] sof_probe_work+0x1e/0x40 [snd_sof] process_one_work+0x894/0x14b0 worker_thread+0x5e5/0xfb0 kthread+0x39d/0x760 ret_from_fork+0x31/0x70 ret_from_fork_asm+0x1a/0x30 Signed-off-by: Tamura Dai Link: https://patch.msgid.link/20250615235548.8591-1-kirinode0@gmail.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index bdfe388da198..3b47191ea7a5 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1257,11 +1257,11 @@ static int check_tplg_quirk_mask(struct snd_soc_acpi_mach *mach) return 0; } -static char *remove_file_ext(const char *tplg_filename) +static char *remove_file_ext(struct device *dev, const char *tplg_filename) { char *filename, *tmp; - filename = kstrdup(tplg_filename, GFP_KERNEL); + filename = devm_kstrdup(dev, tplg_filename, GFP_KERNEL); if (!filename) return NULL; @@ -1345,7 +1345,7 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) */ if (!sof_pdata->tplg_filename) { /* remove file extension if it exists */ - tplg_filename = remove_file_ext(mach->sof_tplg_filename); + tplg_filename = remove_file_ext(sdev->dev, mach->sof_tplg_filename); if (!tplg_filename) return NULL;